2 Commits
yx ... mps

Author SHA1 Message Date
80d9c1de5a benchmark测试,发现瓶颈:路径搜索
Some checks are pending
Build wheels / build (ubuntu-latest, 3.11) (push) Waiting to run
Build wheels / build (ubuntu-latest, 3.12) (push) Waiting to run
Build wheels / build (ubuntu-latest, 3.13) (push) Waiting to run
Tests / check (push) Waiting to run
Tests / build (ubuntu-latest, 3.11) (push) Blocked by required conditions
Tests / build (ubuntu-latest, 3.12) (push) Blocked by required conditions
Tests / build (ubuntu-latest, 3.13) (push) Blocked by required conditions
2026-04-27 18:59:54 +08:00
2c54840e7b 1.完成mps态脚本,与原始qibojit结果比对确定bond demension和cut off值;2.更新了官方库;3.新大陆
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
2026-04-27 11:03:57 +08:00
18 changed files with 208 additions and 576 deletions

2
.gitignore vendored
View File

@@ -2,7 +2,7 @@
__pycache__/
*.py[cod]
*$py.class
data/
# C extensions
*.so

114
benchmark_mps.py Normal file
View File

@@ -0,0 +1,114 @@
"""Benchmark: qibojit (reference) vs qibotn/quimb MPS, with error comparison."""
import time
import argparse
import os
import numpy as np
import qibo
from qibo import Circuit, gates
DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
def make_circuit(circuit_type, nqubits, nlayers=1):
c = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
return QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for q in range(nqubits):
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CZ(q, q + 1))
elif circuit_type == "ghz":
c.add(gates.H(0))
for q in range(nqubits - 1):
c.add(gates.CNOT(q, q + 1))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
return c
def run_qibojit(circuit):
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = result.state()
return sv, elapsed
def run_quimb_mps(circuit, max_bond, svd_cutoff, optimizer):
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="mps", max_bond_dimension=max_bond, svd_cutoff=svd_cutoff)
b.contractions_optimizer = optimizer
t0 = time.time()
result = b.execute_circuit(circuit, return_array=True)
elapsed = time.time() - t0
sv = result.state()
return sv, elapsed
def compare(sv_ref, sv_mps):
sv_ref = np.array(sv_ref, dtype=complex).flatten()
sv_mps = np.array(sv_mps, dtype=complex).flatten()
fidelity = abs(np.dot(sv_ref.conj(), sv_mps)) ** 2
l2_err = np.linalg.norm(sv_ref - sv_mps)
return fidelity, l2_err
def jit_cache_path(circuit_type, nqubits, nlayers):
os.makedirs(DATA_DIR, exist_ok=True)
return os.path.join(DATA_DIR, f"jit_{circuit_type}_n{nqubits}_l{nlayers}.npy")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--nqubits", type=int, default=10)
parser.add_argument("--circuit", type=str, default="ghz",
choices=["qft", "variational", "ghz"])
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--max-bond", type=int, default=None,
help="Max bond dimension for MPS (None = unlimited)")
parser.add_argument("--svd-cutoff", type=float, default=1e-6)
parser.add_argument("--optimizer", type=str, default="auto-hq")
parser.add_argument("--skip-jit", action="store_true",
help="Skip qibojit run, load cached statevector if available")
args = parser.parse_args()
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}")
print(f"MPS config: max_bond={args.max_bond}, svd_cutoff={args.svd_cutoff}, optimizer={args.optimizer}")
cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers)
t_ref = None
if args.skip_jit and os.path.exists(cache_path):
sv_ref = np.load(cache_path)
print(f"\n[qibojit] loaded from cache: {cache_path}")
else:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
sv_ref, t_ref = run_qibojit(circuit_ref)
np.save(cache_path, sv_ref)
print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})")
np.random.seed(42)
circuit_mps = make_circuit(args.circuit, args.nqubits, args.nlayers)
try:
sv_mps, t_mps = run_quimb_mps(circuit_mps, args.max_bond, args.svd_cutoff, args.optimizer)
fidelity, l2_err = compare(sv_ref, sv_mps)
print(f"[quimb MPS] time={t_mps:.4f}s")
print(f"\nFidelity : {fidelity:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_ref is not None and t_mps > 0:
print(f"Speedup : {t_ref/t_mps:.2f}x")
except Exception as e:
print(f"[quimb MPS] FAILED: {e}")
raise
if __name__ == "__main__":
main()

11
log Normal file
View File

@@ -0,0 +1,11 @@
[qibojit] loaded from cache: /home/yx/qibotn/data/jit_variational_n32_l5.npy
bond time(s) fidelity l2_err
----------------------------------------------
1 157.4587 0.00000280 9.99e-01
8 61.9126 0.99999014 2.22e-03
16 63.4902 0.99999014 2.22e-03
32 58.3594 0.99999014 2.22e-03
64 59.7043 0.99999014 2.22e-03
128 64.6368 0.99999014 2.22e-03
256 64.9058 0.99999014 2.22e-03

6
poetry.lock generated
View File

@@ -1733,14 +1733,14 @@ files = [
[[package]]
name = "mako"
version = "1.3.10"
version = "1.3.11"
description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59"},
{file = "mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28"},
{file = "mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77"},
{file = "mako-1.3.11.tar.gz", hash = "sha256:071eb4ab4c5010443152255d77db7faa6ce5916f35226eb02dc34479b6858069"},
]
[package.dependencies]

View File

@@ -167,7 +167,7 @@ def execute_circuit(
raise_error(ValueError, "Initial state not None supported only for MPS ansatz.")
circ_quimb = self.circuit_ansatz.from_openqasm2_str(
circuit.to_qasm(), psi0=initial_state
circuit.to_qasm(), psi0=initial_state, gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff}
)
if nshots:

View File

@@ -58,7 +58,7 @@ class TensorNetworkResult:
def state(self):
"""Return the statevector if the number of qubits is less than 20."""
if self.nqubits < 20:
if self.nqubits < 35:
return self.statevector
raise_error(
NotImplementedError,

39
sweep_bond_32q.py Normal file
View File

@@ -0,0 +1,39 @@
"""Bond dimension sweep for 32-qubit variational circuit."""
import os
import sys
import numpy as np
sys.path.insert(0, os.path.dirname(__file__))
from benchmark_mps import make_circuit, run_qibojit, run_quimb_mps, compare, jit_cache_path, DATA_DIR
NQUBITS = 32
NLAYERS = 5
BOND_VALUES = [1, 8, 16, 32, 64, 128, 256]
SVD_CUTOFF = 1e-6
OPTIMIZER = "auto-hq"
if __name__ == "__main__":
cache_path = jit_cache_path("variational", NQUBITS, NLAYERS)
if os.path.exists(cache_path):
sv_ref = np.load(cache_path)
print(f"[qibojit] loaded from cache: {cache_path}\n")
else:
np.random.seed(42)
circuit_ref = make_circuit("variational", NQUBITS, NLAYERS)
sv_ref, t_ref = run_qibojit(circuit_ref)
np.save(cache_path, sv_ref)
print(f"[qibojit] time={t_ref:.4f}s (saved to {cache_path})\n")
print(f"{'bond':>6} {'time(s)':>10} {'fidelity':>12} {'l2_err':>10}")
print("-" * 46)
for bond in BOND_VALUES:
np.random.seed(42)
circuit_mps = make_circuit("variational", NQUBITS, NLAYERS)
try:
sv_mps, t_mps = run_quimb_mps(circuit_mps, bond, SVD_CUTOFF, OPTIMIZER)
fidelity, l2_err = compare(sv_ref, sv_mps)
print(f"{bond:>6} {t_mps:>10.4f} {fidelity:>12.8f} {l2_err:>10.2e}")
except Exception as e:
print(f"{bond:>6} FAILED: {e}")

View File

@@ -1,27 +0,0 @@
import time
import pickle
def run(input="tree.pkl"):
with open(input, "rb") as f:
data = pickle.load(f)
sliced_tree = data["sliced_tree"]
arrays = data["arrays"]
n_slices = sliced_tree.nslices
print(f"Total slices: {n_slices}")
t0 = time.perf_counter()
total = sum(sliced_tree.contract_slice(arrays, i, backend='numpy',implementation='cotengra') for i in range(n_slices))
t1 = time.perf_counter()
print(f"Contract: {t1 - t0:.4f} s")
#print(f"Result: {total:.10f}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input", type=str, default="tree.pkl.bak")
args = parser.parse_args()
run(args.input)

View File

@@ -1,60 +0,0 @@
"""生成比赛常用测试电路的 QASM 文件。"""
import argparse
import qibo
from qibo.models import QFT, Circuit
from qibo import gates
import numpy as np
qibo.set_backend("numpy")
def gen_qft(n_qubits):
return QFT(n_qubits, with_swaps=True).to_qasm()
def gen_random(n_qubits, depth, seed):
rng = np.random.default_rng(seed)
c = Circuit(n_qubits)
for _ in range(depth):
for q in range(n_qubits):
c.add(gates.H(q))
for q in range(0, n_qubits - 1, 2):
c.add(gates.CZ(q, q + 1))
return c.to_qasm()
def gen_supremacy(n_qubits, depth, seed):
"""Google supremacy 风格:随机单比特门 + CZ"""
rng = np.random.default_rng(seed)
single = [gates.X, gates.Y, gates.H]
c = Circuit(n_qubits)
for _ in range(depth):
for q in range(n_qubits):
g = single[rng.integers(3)]
c.add(g(q))
for q in range(0, n_qubits - 1, 2):
c.add(gates.CZ(q, q + 1))
for q in range(1, n_qubits - 1, 2):
c.add(gates.CZ(q, q + 1))
return c.to_qasm()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--circuit", default="qft", choices=["qft", "random", "supremacy"])
parser.add_argument("--n_qubits", type=int, default=20)
parser.add_argument("--depth", type=int, default=10)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--out", default="circuit.qasm")
args = parser.parse_args()
if args.circuit == "qft":
qasm = gen_qft(args.n_qubits)
elif args.circuit == "random":
qasm = gen_random(args.n_qubits, args.depth, args.seed)
else:
qasm = gen_supremacy(args.n_qubits, args.depth, args.seed)
with open(args.out, "w") as f:
f.write(qasm)
print(f"Written: {args.out} ({args.n_qubits} qubits, {args.circuit})")

View File

@@ -1,2 +0,0 @@
192.168.20.102
192.168.20.101

View File

@@ -1,126 +0,0 @@
"""
MPI + ThreadPoolExecutor 混合并行张量网络收缩。
每个 MPI rank 负责一部分 slicestride 分配),
rank 内用 ThreadPoolExecutor 并行执行各 slice每线程一个 slice
用法:
mpirun -n <N> python mpi_v.py --qasm circuit.qasm --target-slices 16 --threads 8
"""
import os
import time
import argparse
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
import quimb.tensor as qtn
import cotengra as ctg
def _contract_slice(sliced_tree, arrays, idx):
return sliced_tree.contract_slice(arrays, idx, backend="numpy")
def run(qasm_path, target_slices, n_threads, max_repeats):
# ── 构建张量网络rank 0broadcast arrays──
if rank == 0:
with open(qasm_path) as f:
qasm_str = f.read()
# 不用 full_simplify保持 outer_inds 完整
psi = qtn.Circuit.from_openqasm2_str(qasm_str).psi
n_qubits = len([i for i in psi.outer_inds() if i.startswith("k")])
output_inds = [f"k{i}" for i in range(n_qubits)]
arrays = [t.data for t in psi.tensors]
else:
psi = None
n_qubits = None
arrays = None
output_inds = None
n_qubits = comm.bcast(n_qubits, root=0)
arrays = comm.bcast(arrays, root=0)
output_inds = comm.bcast(output_inds, root=0)
# ── 路径搜索rank 0+ broadcast ──
t0 = time.perf_counter()
if rank == 0:
opt = ctg.HyperOptimizer(
methods=["kahypar", "greedy"],
max_repeats=max_repeats,
minimize="flops",
parallel=min(96, os.cpu_count()),
)
tree = psi.contraction_tree(optimize=opt, output_inds=output_inds)
n = target_slices
sliced_tree = None
while n >= 1:
try:
sliced_tree = tree.slice(target_size=n, allow_outer=False)
break
except RuntimeError:
n //= 2
if sliced_tree is None:
sliced_tree = tree.slice(target_slices=1, allow_outer=True)
print(f"[rank 0] path search: {time.perf_counter()-t0:.2f}s slices: {sliced_tree.nslices}", flush=True)
else:
sliced_tree = None
sliced_tree = comm.bcast(sliced_tree, root=0)
n_slices = sliced_tree.nslices
# ── 分布式收缩MPI stride + ThreadPoolExecutor──
my_indices = list(range(rank, n_slices, size))
local_result = np.zeros(2**n_qubits, dtype=np.complex128)
comm.Barrier()
t1 = time.perf_counter()
with ThreadPoolExecutor(max_workers=n_threads) as pool:
for batch_start in range(0, len(my_indices), n_threads):
batch = my_indices[batch_start:batch_start + n_threads]
futures = {pool.submit(_contract_slice, sliced_tree, arrays, i): i for i in batch}
for fut in as_completed(futures):
local_result += np.array(fut.result()).flatten()
t2 = time.perf_counter()
if rank == 0:
print(f"[rank 0] contract: {t2-t1:.2f}s", flush=True)
# ── MPI reduce ──
total = comm.reduce(local_result, op=MPI.SUM, root=0)
if rank == 0:
print(f"result norm: {np.linalg.norm(total):.10f}", flush=True)
print(f"total time: {t2-t0:.2f}s", flush=True)
return total
return None
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--qasm", required=True, help="QASM 文件路径")
parser.add_argument("--target-slices", type=int, default=None,
help="目标切片数量(优先于 target-size")
parser.add_argument("--target-size", type=int, default=28,
help="切片目标大小指数2^N默认 28")
parser.add_argument("--threads", type=int, default=max(1, os.cpu_count() // size),
help="每个 rank 的线程数,默认 cpu_count/size")
parser.add_argument("--max-repeats", type=int, default=256,
help="cotengra 路径搜索重复次数")
args = parser.parse_args()
target = args.target_slices if args.target_slices else 2**args.target_size
mode = "slices" if args.target_slices else f"size=2^{args.target_size}"
if rank == 0:
print(f"ranks={size} threads/rank={args.threads} target_{mode}", flush=True)
run(args.qasm, target, args.threads, args.max_repeats)
if __name__ == "__main__":
main()

View File

@@ -1,68 +0,0 @@
import os
import time
import numpy as np
import quimb.tensor as qtn
import cotengra as ctg
'''
# --- 1. 关键:在导入 numpy/quimb 之前设置环境变量 ---
# 告诉底层 BLAS 库 (MKL/OpenBLAS) 使用 96 个线程
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
# 优化线程亲和性,避免线程在不同 CPU 核心间跳变,提升缓存命中率
os.environ["KMP_AFFINITY"] = "granularity=fine,compact,1,0"
os.environ["KMP_BLOCKTIME"] = "0"
'''
# 现在导入库
import psutil
def run_baseline(n_qubits=50, depth=20):
print(f"🚀 {n_qubits} Qubits, Depth {depth}")
print(f"💻 Detected Logical Cores: {os.cpu_count()}")
# 1. 构建电路 (必须 complex128 保证精度)
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
for d in range(depth):
for i in range(n_qubits):
circ.apply_gate('H', i)
for i in range(0, n_qubits - 1, 2):
circ.apply_gate('CZ', i, i + 1)
psi = circ.psi
# 2. 构建闭合网络 <psi|psi>
net = psi.conj() & psi
# 3. 路径搜索参数 (Kahypar)
print("🔍 Searching path with Kahypar...")
opt = ctg.HyperOptimizer(
methods=['kahypar'],
max_repeats=128,
parallel=96,
minimize='flops',
on_trial_error='ignore'
)
# 4. 阶段1路径搜索
t0 = time.perf_counter()
tree = net.contraction_tree(optimize=opt)
t1 = time.perf_counter()
print(f"🔍 Path search done: {t1 - t0:.4f} s")
# 5. 阶段2张量收缩
result = net.contract(optimize=tree, backend='numpy')
t2 = time.perf_counter()
peak_mem = psutil.Process().memory_info().rss / 1024**3
print(f"✅ Done!")
print(f"⏱️ Contract: {t2 - t1:.4f} s | Total: {t2 - t0:.4f} s")
print(f"💾 Peak Memory: {peak_mem:.2f} GB")
print(f"🔢 Result: {result:.10f}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--n_qubits", type=int, default=50)
parser.add_argument("--depth", type=int, default=20)
args = parser.parse_args()
run_baseline(n_qubits=args.n_qubits, depth=args.depth)

View File

@@ -1,90 +0,0 @@
import time
import numpy as np
import quimb.tensor as qtn
import cotengra as ctg
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
def build_qft(n_qubits):
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
for i in range(n_qubits):
circ.apply_gate('H', i)
for j in range(i + 1, n_qubits):
circ.apply_gate('CPHASE', np.pi / 2 ** (j - i), i, j)
return circ
def run_mpi(n_qubits, depth=None):
if rank == 0:
print(f"MPI size: {size} ranks")
print(f"Circuit: QFT {n_qubits} qubits")
circ = build_qft(n_qubits)
psi = circ.psi
# 期望值网络:<psi|Z_0|psi>
Z = np.array([[1, 0], [0, -1]], dtype=np.complex128)
bra = psi.conj().reindex({f'k{i}': f'b{i}' for i in range(n_qubits)})
obs = qtn.Tensor(Z, inds=(f'k0', f'b0'))
net = psi & obs & bra
# 2. 所有 rank 并行搜索路径rank 0 选全局最优
t0 = time.perf_counter()
repeats_per_rank = max(1, 128 // size)
opt = ctg.HyperOptimizer(
methods=['kahypar'],
#methods=['greedy'],
#max_repeats=repeats_per_rank,
max_repeats=repeats_per_rank,
minimize='flops',
parallel=max(1, 96 // size),
)
local_tree = net.contraction_tree(optimize=opt)
all_trees = comm.gather(local_tree, root=0)
if rank == 0:
tree = min(all_trees, key=lambda t: t.contraction_cost())
t1 = time.perf_counter()
print(f"[rank 0] Path search: {t1 - t0:.4f} s")
else:
tree = None
tree = comm.bcast(tree, root=0)
# 3. rank 0 切片broadcast sliced_tree
if rank == 0:
sliced_tree = tree.slice(target_size=2**27)
else:
sliced_tree = None
sliced_tree = comm.bcast(sliced_tree, root=0)
n_slices = sliced_tree.nslices
if rank == 0:
print(f"Total slices: {n_slices}, each rank handles ~{n_slices // size}")
arrays = [t.data for t in net.tensors]
# 每个 rank 处理自己负责的切片
t2 = time.perf_counter()
local_result = 0.0 + 0.0j
for i in range(rank, n_slices, size):
local_result += sliced_tree.contract_slice(arrays, i, backend='numpy')
t3 = time.perf_counter()
# 4. reduce 汇总到 rank 0
total = comm.reduce(local_result, op=MPI.SUM, root=0)
if rank == 0:
print(f"[rank 0] Contract: {t3 - t2:.4f} s")
print(f"Result: {total:.10f}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--n_qubits", type=int, default=20)
parser.add_argument("--depth", type=int, default=30)
args = parser.parse_args()
run_mpi(args.n_qubits, args.depth)

View File

@@ -1,103 +0,0 @@
import time
import numpy as np
import quimb.tensor as qtn
import cotengra as ctg
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
def build_qft_circuit(n_qubits):
"""构建标准 QFT 电路"""
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
for i in range(n_qubits):
# 1. 施加 H 门
circ.apply_gate('H', i)
# 2. 施加受控相位旋转
for j in range(i + 1, n_qubits):
theta = np.pi / (2**(j - i))
circ.apply_gate('CPHASE', theta, i, j)
return circ
def run_mpi(n_qubits):
if rank == 0:
print(f"MPI size: {size} ranks")
print(f"Circuit: QFT {n_qubits} qubits")
# 1. 所有 rank 独立构建 QFT 电路
circ = build_qft_circuit(n_qubits)
# 物理观测:计算 <psi|psi>,结果应为 1.0
# 注意QFT 是幺正变换,末态模长平方必为 1
psi = circ.psi
net = psi.conj() & psi
# 2. 路径搜索优化
t0 = time.perf_counter()
# 每个 rank 尝试不同的种子,增加找到全局最优路径的概率
repeats_per_rank = max(1, 256 // size)
opt = ctg.HyperOptimizer(
methods=['kahypar'],
max_repeats=repeats_per_rank,
minimize='flops',
parallel=max(1, 96 // size),
)
# 搜索收缩树
local_tree = net.contraction_tree(optimize=opt)
# 汇总所有 rank 找到的树,在 rank 0 选出 FLOPs 最低的那棵
all_trees = comm.gather(local_tree, root=0)
if rank == 0:
tree = min(all_trees, key=lambda t: t.contraction_cost())
t1 = time.perf_counter()
print(f"[rank 0] Path search: {t1 - t0:.4f} s")
print(f"[rank 0] Best path FLOPs: {tree.contraction_cost():.2e}")
else:
tree = None
# 将最优路径广播给所有进程
tree = comm.bcast(tree, root=0)
# 3. 切片处理(性能控制核心)
if rank == 0:
# 比赛建议:将 target_size 设为能填满单进程内存的 50%-70%
# 或者改用 target_slices=size * 4 以确保负载绝对平衡
sliced_tree = tree.slice(target_size=2**27)
else:
sliced_tree = None
sliced_tree = comm.bcast(sliced_tree, root=0)
n_slices = sliced_tree.nslices
if rank == 0:
print(f"Total slices: {n_slices}, each rank handles ~{n_slices // size + 1}")
# 获取原始张量数据
arrays = [t.data for t in net.tensors]
# 4. 执行收缩计算
t2 = time.perf_counter()
local_result = 0.0 + 0.0j
# 简单的静态负载均衡:每个 rank 跳步处理切片
for i in range(rank, n_slices, size):
local_result += sliced_tree.contract_slice(arrays, i, backend='numpy')
t3 = time.perf_counter()
# 5. 结果汇总
total = comm.reduce(local_result, op=MPI.SUM, root=0)
if rank == 0:
duration = t3 - t2
print(f"[rank 0] Contract: {duration:.4f} s")
# 对于 <psi|psi>QFT 的正确结果应无限接近 1.0
print(f"Result (Norm): {total.real:.10f} + {total.imag:.10f}j")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--n_qubits", type=int, default=20)
# QFT 的深度由比特数自动决定,所以删除了 --depth 参数
args = parser.parse_args()
run_mpi(args.n_qubits)

View File

@@ -1,56 +0,0 @@
import time
import pickle
import numpy as np
import quimb.tensor as qtn
import cotengra as ctg
def build_qft(n_qubits):
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
for i in range(n_qubits):
circ.apply_gate('H', i)
for j in range(i + 1, n_qubits):
circ.apply_gate('CPHASE', np.pi / 2 ** (j - i), i, j)
return circ
def run(n_qubits, output="tree.pkl"):
print(f"Circuit: QFT {n_qubits} qubits")
circ = build_qft(n_qubits)
psi = circ.psi
Z = np.array([[1, 0], [0, -1]], dtype=np.complex128)
bra = psi.conj().reindex({f'k{i}': f'b{i}' for i in range(n_qubits)})
obs = qtn.Tensor(Z, inds=(f'k0', f'b0'))
net = psi & obs & bra
t0 = time.perf_counter()
opt = ctg.HyperOptimizer(
methods=['kahypar'],
max_repeats=32,
minimize='combo',
parallel=8,
)
tree = net.contraction_tree(optimize=opt)
t1 = time.perf_counter()
print(f"Path search: {t1 - t0:.4f} s")
print(tree)
sliced_tree = tree.slice(target_size=2**28)
print(f"Total slices: {sliced_tree.nslices}")
arrays = [t.data for t in net.tensors]
with open(output, "wb") as f:
pickle.dump({"sliced_tree": sliced_tree, "arrays": arrays}, f)
print(f"Saved to {output}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--n_qubits", type=int, default=18)
parser.add_argument("--output", type=str, default="tree.pkl")
args = parser.parse_args()
run(args.n_qubits, args.output)

View File

@@ -61,6 +61,6 @@ def test_eval(nqubits: int, tolerance: float, is_mps: bool):
qasm_circ, init_state_tn, gate_opt, backend=config.quimb.backend
).flatten()
#assert np.allclose(
# result_sv, result_tn, atol=tolerance
#), "Resulting dense vectors do not match"
assert np.allclose(
result_sv, result_tn, atol=tolerance
), "Resulting dense vectors do not match"

Binary file not shown.