6 Commits

Author SHA1 Message Date
5a692033a6 添加MPI并行TN benchmark及辅助脚本,移除旧benchmark
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-05-05 19:04:09 +08:00
a3f39a1d67 删除tn脚本implementation 2026-05-03 19:06:21 +08:00
dd222587b7 tn脚本更新
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
2026-05-03 18:54:05 +08:00
740828872e 添加tn脚本
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
2026-04-28 23:07:39 +08:00
80d9c1de5a benchmark测试,发现瓶颈:路径搜索
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
2026-04-27 18:59:54 +08:00
2c54840e7b 1.完成mps态脚本,与原始qibojit结果比对确定bond demension和cut off值;2.更新了官方库;3.新大陆
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
2026-04-27 11:03:57 +08:00
25 changed files with 1607 additions and 579 deletions

6
.gitignore vendored
View File

@@ -2,10 +2,14 @@
__pycache__/
*.py[cod]
*$py.class
data/
# C extensions
*.so
bak/
path/
profiles/
perf*
# Distribution / packaging
.Python
build/

460
bench_profile.py Normal file
View File

@@ -0,0 +1,460 @@
"""Benchmark: qibotn/quimb generic TN — single-process torch profiling version."""
import os
import pickle
import time
import argparse
import numpy as np
import cotengra as ctg
import qibo
from qibo import Circuit, gates
def make_circuit(circuit_type, nqubits, nlayers=1):
c = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
return QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for q in range(nqubits):
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CZ(q, q + 1))
elif circuit_type == "ghz":
c.add(gates.H(0))
for q in range(nqubits - 1):
c.add(gates.CNOT(q, q + 1))
elif circuit_type == "brickwork":
for q in range(nqubits):
c.add(gates.H(q))
for layer in range(nlayers):
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CNOT(q, q + 1))
c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi)))
c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi)))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
return c
def make_z_observable(nqubits):
"""Z on qubit 0 only — single contraction for benchmarking."""
return ["z"], [(0,)], [1.0]
def export_profiler_outputs(prof, trace_path):
"""Export Chrome trace and text table."""
prof.export_chrome_trace(trace_path)
table_path = trace_path.replace(".json", ".txt")
with open(table_path, "w") as f:
f.write(
prof.key_averages().table(
sort_by="self_cpu_time_total",
row_limit=200,
)
)
print(f" [torch profiler trace] {trace_path}")
print(f" [torch profiler table] {table_path}")
def run_quimb_tn(
circuit,
nqubits,
num_slices,
load_path=None,
save_path=None,
):
"""Mode: expval — compute <Z_0> via local_expectation."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
operators, sites, coeffs = make_z_observable(nqubits)
ops = b._string_to_quimb_operator(operators[0])
qc = b._qibo_circuit_to_quimb(
circuit,
quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10},
)
if load_path:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree = saved["tree"]
t_search = 0.0
print(f" [path loaded] {load_path}")
else:
opt = ctg.HyperOptimizer(
methods=["kahypar", "random-greedy", "spinglass"],
max_repeats=16,
parallel=True,
max_time=60,
slicing_opts={"target_slices": num_slices},
progbar=True,
)
t0 = time.time()
rehearsal = qc.local_expectation(
ops,
where=sites[0],
optimize=opt,
simplify_sequence="R",
rehearse=True,
)
t_search = time.time() - t0
tree = rehearsal["tree"]
print(
f" [path search] {t_search:.3f}s "
f"flops~2^{tree.contraction_cost():.2f} "
f"size~2^{tree.contraction_width():.2f} "
f"slices={tree.multiplicity}"
)
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree}, f)
print(f" [path saved] {save_path}")
t0 = time.time()
expval = qc.local_expectation(
ops,
where=sites[0],
optimize=tree,
simplify_sequence="R",
)
t_contract = time.time() - t0
print(f" [contraction] {t_contract:.3f}s")
return float(expval.real), t_search + t_contract
def run_quimb_tn_statevector(
circuit,
nqubits,
num_slices,
load_path=None,
save_path=None,
profile=False,
profile_dir="profiles",
):
"""Mode: statevector — contract full TN to dense vector, single process."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
import torch
qc = b._qibo_circuit_to_quimb(
circuit,
quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10},
)
# 让 quimb 生成 torch tensor这样 torch.profiler 能抓到 aten op。
qc.to_backend = torch.from_numpy
if load_path:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree = saved["tree"]
t_search = 0.0
print(f" [path loaded] {load_path}")
else:
opt = ctg.HyperOptimizer(
methods=["kahypar", "random-greedy", "spinglass"],
max_repeats=500,
parallel=48,
max_time=100,
minimize="size",
slicing_opts={"target_slices": num_slices},
progbar=True,
)
t0 = time.time()
rehearsal = qc.to_dense(optimize=opt, rehearse=True)
t_search = time.time() - t0
tree = rehearsal["tree"]
print(
f" [path search] {t_search:.3f}s "
f"flops~2^{tree.contraction_cost():.2f} "
f"size~2^{tree.contraction_width():.2f} "
f"slices={tree.multiplicity}"
)
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree}, f)
print(f" [path saved] {save_path}")
os.makedirs(profile_dir, exist_ok=True)
if profile:
from torch.profiler import profile as torch_profile
from torch.profiler import ProfilerActivity, record_function
trace_path = os.path.join(
profile_dir,
(
f"trace_statevector_"
f"{circuit.nqubits}q_"
f"slices{tree.multiplicity}_"
f"{int(time.time())}.json"
),
)
t0 = time.time()
with torch_profile(
activities=[ProfilerActivity.CPU],
record_shapes=True,
profile_memory=True,
with_stack=True,
) as prof:
with record_function("qibotn_to_dense_contraction"):
sv = qc.to_dense(optimize=tree).reshape(-1)
with record_function("torch_to_numpy_view_or_copy"):
if type(sv).__module__.startswith("torch"):
sv_tn = sv.detach().cpu().numpy()
else:
sv_tn = np.asarray(sv)
t_contract = time.time() - t0
export_profiler_outputs(prof, trace_path)
else:
t0 = time.time()
sv = qc.to_dense(optimize=tree).reshape(-1)
t_contract = time.time() - t0
if type(sv).__module__.startswith("torch"):
sv_tn = sv.detach().cpu().numpy()
else:
sv_tn = np.asarray(sv)
print(f" [contraction] {t_contract:.3f}s")
return sv_tn, t_search + t_contract
def run_quimb_tn_samples(circuit, nshots=1024):
"""Mode: samples — sample from circuit output distribution."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
t0 = time.time()
result = b.execute_circuit(circuit, nshots=nshots)
t_total = time.time() - t0
print(f" [sampling] {t_total:.3f}s nshots={nshots}")
try:
freqs = result.frequencies()
print(f" top states: {dict(list(freqs.items())[:5])}")
except Exception:
pass
return result, t_total
def qibojit_expval(circuit, nqubits):
"""Compute <Z_0> via qibojit statevector."""
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = np.array(result.state(), dtype=complex).flatten()
probs = np.abs(sv) ** 2
bits = (np.arange(len(probs)) >> (nqubits - 1)) & 1
expval = float(np.dot(probs, 1 - 2 * bits))
return expval, elapsed
def run_qibojit(circuit):
"""Compute full statevector via qibojit."""
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = np.array(result.state(), dtype=complex).flatten()
return sv, elapsed
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--nqubits", type=int, default=10)
parser.add_argument(
"--circuit",
type=str,
default="qft",
choices=["qft", "variational", "ghz", "brickwork"],
)
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--num-slices", type=int, default=1)
parser.add_argument("--nshots", type=int, default=1024)
parser.add_argument(
"--mode",
type=str,
default="statevector",
choices=["expval", "statevector", "samples"],
help="expval: local_expectation; statevector: to_dense; samples: sampling",
)
parser.add_argument(
"--no-compare",
action="store_true",
help="Skip qibojit reference run",
)
parser.add_argument(
"--save-path",
type=str,
default=None,
help="Save contraction tree to a pickle file",
)
parser.add_argument(
"--load-path",
type=str,
default=None,
help="Load contraction tree from a pickle file and skip path search",
)
parser.add_argument(
"--profile",
action="store_true",
help="Enable torch profiler for statevector contraction stage",
)
parser.add_argument(
"--profile-dir",
type=str,
default="profiles",
help="Directory to save torch profiler traces",
)
parser.add_argument(
"--save-statevector",
action="store_true",
help="Save TN statevector to data/sv_tn_*.npy",
)
args = parser.parse_args()
print(
f"Circuit: {args.circuit}, "
f"nqubits={args.nqubits}, "
f"nlayers={args.nlayers}, "
f"mode={args.mode}, "
f"profile={args.profile}"
)
np.random.seed(42)
circuit_tn = make_circuit(args.circuit, args.nqubits, args.nlayers)
try:
if args.mode == "expval":
expval_tn, t_tn = run_quimb_tn(
circuit_tn,
args.nqubits,
args.num_slices,
load_path=args.load_path,
save_path=args.save_path,
)
print(f"\n[quimb TN] time={t_tn:.4f}s <Z_0>={expval_tn:.8f}")
elif args.mode == "statevector":
sv_tn, t_tn = run_quimb_tn_statevector(
circuit_tn,
args.nqubits,
args.num_slices,
load_path=args.load_path,
save_path=args.save_path,
profile=args.profile,
profile_dir=args.profile_dir,
)
print(
f"\n[quimb TN] time={t_tn:.4f}s "
f"statevector shape={sv_tn.shape}"
)
if args.save_statevector:
os.makedirs("data", exist_ok=True)
out_path = f"data/sv_tn_{args.circuit}{args.nqubits}.npy"
np.save(out_path, sv_tn)
print(f"[saved statevector] {out_path}")
else:
_, t_tn = run_quimb_tn_samples(
circuit_tn,
nshots=args.nshots,
)
print(f"\n[quimb TN] time={t_tn:.4f}s")
args.no_compare = True
except Exception as e:
print(f"[quimb TN] FAILED: {e}")
raise
if not args.no_compare and args.mode != "statevector":
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
expval_ref, t_ref = qibojit_expval(circuit_ref, args.nqubits)
print(f"[qibojit] time={t_ref:.4f}s <Z_0>={expval_ref:.8f}")
print(f"\nDiff : {abs(expval_tn - expval_ref):.2e}")
if t_tn > 0:
print(f"Speedup : {t_ref / t_tn:.2f}x")
elif not args.no_compare and args.mode == "statevector" and sv_tn is not None:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
sv_ref, t_ref = run_qibojit(circuit_ref)
fid = abs(np.dot(sv_ref.conj(), sv_tn)) ** 2
l2_err = np.linalg.norm(sv_ref - sv_tn)
print(f"[qibojit] time={t_ref:.4f}s")
print(f"Fidelity : {fid:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_tn > 0:
print(f"Speedup : {t_ref / t_tn:.2f}x")
if __name__ == "__main__":
main()

189
benchmark_mps.py Normal file
View File

@@ -0,0 +1,189 @@
"""Benchmark: qibojit (reference) vs qibotn/quimb MPS, with error comparison."""
import time
import argparse
import os
import numpy as np
import qibo
import quimb.tensor as qtn
from qibo import Circuit, gates
DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
def make_circuit(circuit_type, nqubits, nlayers=1, add_measurements=False):
c = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
c = QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for q in range(nqubits):
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CZ(q, q + 1))
elif circuit_type == "ghz":
c.add(gates.H(0))
for q in range(nqubits - 1):
c.add(gates.CNOT(q, q + 1))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
if add_measurements:
c.add(gates.M(*range(nqubits)))
return c
def run_qibojit(circuit):
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = result.state()
return sv, elapsed
def run_quimb_mps(circuit, max_bond, svd_cutoff, optimizer, nshots=None):
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="mps", max_bond_dimension=max_bond, svd_cutoff=svd_cutoff)
b.contractions_optimizer = optimizer
t0 = time.time()
if nshots:
result = b.execute_circuit(circuit, nshots=nshots)
elapsed = time.time() - t0
return result.frequencies(), elapsed, 0.0
else:
# MPS simulation
circ_quimb = qtn.CircuitMPS.from_openqasm2_str(
circuit.to_qasm(),
gate_opts={"max_bond": max_bond, "cutoff": svd_cutoff},
)
t_mps = time.time() - t0
# to_dense separately
t1 = time.time()
#sv = circ_quimb.psi.to_dense().reshape(-1)
sv = None
t_dense = time.time() - t1
return sv, t_mps, t_dense
def run_quimb_permmps(circuit, max_bond, svd_cutoff, nshots=None):
gates_list = [
qtn.Gate(g.name, params=list(g.parameters), qubits=list(g.qubits))
for g in circuit.queue
if g.name.lower() != "measure"
]
t0 = time.time()
circ = qtn.CircuitPermMPS.from_gates(
gates_list,
N=circuit.nqubits,
max_bond=max_bond,
cutoff=svd_cutoff,
)
if nshots:
from collections import Counter
result = Counter(circ.sample(nshots))
elapsed = time.time() - t0
return dict(result), elapsed
else:
mps = circ.get_psi_unordered()
sv = mps.to_dense().reshape(-1)
elapsed = time.time() - t0
return sv, elapsed
def compare_statevector(sv_ref, sv_mps):
sv_ref = np.array(sv_ref, dtype=complex).flatten()
sv_mps = np.array(sv_mps, dtype=complex).flatten()
fidelity = abs(np.dot(sv_ref.conj(), sv_mps)) ** 2
l2_err = np.linalg.norm(sv_ref - sv_mps)
return fidelity, l2_err
def compare_frequencies(freq_ref, freq_mps, nshots):
all_keys = set(freq_ref) | set(freq_mps)
tvd = 0.5 * sum(abs(freq_ref.get(k, 0) - freq_mps.get(k, 0)) for k in all_keys) / nshots
return tvd
def jit_cache_path(circuit_type, nqubits, nlayers):
os.makedirs(DATA_DIR, exist_ok=True)
return os.path.join(DATA_DIR, f"jit_{circuit_type}_n{nqubits}_l{nlayers}.npy")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--nqubits", type=int, default=10)
parser.add_argument("--circuit", type=str, default="ghz",
choices=["qft", "variational", "ghz"])
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--max-bond", type=int, default=None,
help="Max bond dimension for MPS (None = unlimited)")
parser.add_argument("--svd-cutoff", type=float, default=1e-6)
parser.add_argument("--optimizer", type=str, default="eager")
parser.add_argument("--nshots", type=int, default=None,
help="Use sampling mode with given number of shots instead of statevector")
parser.add_argument("--permmps", action="store_true",
help="Use CircuitPermMPS directly instead of qibotn backend")
parser.add_argument("--skip-jit", action="store_true",
help="Skip qibojit run, load cached statevector if available")
parser.add_argument("--no-compare", action="store_true",
help="Skip correctness comparison entirely")
args = parser.parse_args()
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}")
print(f"MPS config: max_bond={args.max_bond}, svd_cutoff={args.svd_cutoff}, optimizer={args.optimizer}")
ref = None
t_ref = None
if not args.no_compare:
cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers)
if args.nshots:
# frequency mode: run qibojit with same nshots
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers, add_measurements=True)
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result_ref = circuit_ref(nshots=args.nshots)
t_ref = time.time() - t0
ref = dict(result_ref.frequencies())
print(f"\n[qibojit] time={t_ref:.4f}s")
elif args.skip_jit and os.path.exists(cache_path):
ref = np.load(cache_path)
print(f"\n[qibojit] loaded from cache: {cache_path}")
else:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
ref, t_ref = run_qibojit(circuit_ref)
np.save(cache_path, ref)
print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})")
np.random.seed(42)
circuit_mps = make_circuit(args.circuit, args.nqubits, args.nlayers)
label = "quimb PermMPS" if args.permmps else "quimb MPS"
try:
if args.permmps:
out, t_mps = run_quimb_permmps(circuit_mps, args.max_bond, args.svd_cutoff, args.nshots)
t_dense = 0.0
else:
out, t_mps, t_dense = run_quimb_mps(circuit_mps, args.max_bond, args.svd_cutoff, args.optimizer, args.nshots)
print(f"[{label}] MPS sim={t_mps:.4f}s to_dense={t_dense:.4f}s total={t_mps+t_dense:.4f}s")
if not args.no_compare:
if args.nshots:
tvd = compare_frequencies(ref, out, args.nshots)
print(f"\nTVD : {tvd:.6f} (0=perfect)")
else:
fidelity, l2_err = compare_statevector(ref, out)
print(f"\nFidelity : {fidelity:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_ref is not None and t_mps > 0:
print(f"Speedup : {t_ref/t_mps:.2f}x")
except Exception as e:
print(f"[quimb MPS] FAILED: {e}")
raise
if __name__ == "__main__":
main()

126
benchmark_qmatchatea.py Normal file
View File

@@ -0,0 +1,126 @@
"""Benchmark: qibojit (reference) vs qibotn/qmatchatea MPS."""
import time
import argparse
import os
import numpy as np
import qibo
from qibo import Circuit, gates
from qibo.backends import construct_backend
DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
def make_circuit(circuit_type, nqubits, nlayers=1):
c = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
return QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for q in range(nqubits):
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CZ(q, q + 1))
elif circuit_type == "ghz":
c.add(gates.H(0))
for q in range(nqubits - 1):
c.add(gates.CNOT(q, q + 1))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
return c
def run_qibojit(circuit):
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
return result.state(), elapsed
def run_qmatchatea(circuit, max_bond, cut_ratio):
import qmatchatea, qtealeaves.observables
from qibo.backends import construct_backend as _cb
b = _cb(backend="qibotn", platform="qmatchatea")
b.configure_tn_simulation(ansatz="MPS", max_bond_dimension=max_bond, cut_ratio=cut_ratio)
qk_circuit = b._qibocirc_to_qiskitcirc(circuit)
run_qk_params = qmatchatea.preprocessing.qk_transpilation_params(False)
observables = qtealeaves.observables.TNObservables()
observables += qtealeaves.observables.TNState2File(name="temp", formatting="D")
t0 = time.time()
results = qmatchatea.run_simulation(
circ=qk_circuit,
convergence_parameters=b.convergence_params,
transpilation_parameters=run_qk_params,
backend=b.qmatchatea_backend,
observables=observables,
)
elapsed = time.time() - t0
tn_state = results.observables.get("tn_state")
if tn_state is None:
results.load_state()
tn_state = results.observables["tn_state"]
sv_obj = tn_state.to_statevector(qiskit_order=False, max_qubit_equivalent=40)
sv = np.array(sv_obj.elem, dtype=complex).flatten()
return sv, elapsed
def compare(sv_ref, sv_mps):
sv_ref = np.array(sv_ref, dtype=complex).flatten()
fidelity = abs(np.dot(sv_ref.conj(), sv_mps)) ** 2
l2_err = np.linalg.norm(sv_ref - sv_mps)
return fidelity, l2_err
def jit_cache_path(circuit_type, nqubits, nlayers):
os.makedirs(DATA_DIR, exist_ok=True)
return os.path.join(DATA_DIR, f"jit_{circuit_type}_n{nqubits}_l{nlayers}.npy")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--nqubits", type=int, default=10)
parser.add_argument("--circuit", type=str, default="ghz",
choices=["qft", "variational", "ghz"])
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--max-bond", type=int, default=64)
parser.add_argument("--cut-ratio", type=float, default=1e-6)
parser.add_argument("--skip-jit", action="store_true")
args = parser.parse_args()
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}")
print(f"MPS config: max_bond={args.max_bond}, cut_ratio={args.cut_ratio}")
cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers)
t_ref = None
if args.skip_jit and os.path.exists(cache_path):
sv_ref = np.load(cache_path)
print(f"\n[qibojit] loaded from cache: {cache_path}")
else:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
sv_ref, t_ref = run_qibojit(circuit_ref)
np.save(cache_path, sv_ref)
print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})")
np.random.seed(42)
circuit_mps = make_circuit(args.circuit, args.nqubits, args.nlayers)
try:
sv_mps, t_mps = run_qmatchatea(circuit_mps, args.max_bond, args.cut_ratio)
fidelity, l2_err = compare(sv_ref, sv_mps)
print(f"[qmatchatea] time={t_mps:.4f}s")
print(f"\nFidelity : {fidelity:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_ref is not None and t_mps > 0:
print(f"Speedup : {t_ref/t_mps:.2f}x")
except Exception as e:
print(f"[qmatchatea] FAILED: {e}")
raise
if __name__ == "__main__":
main()

386
benchmark_tn.py Normal file
View File

@@ -0,0 +1,386 @@
"""Benchmark: qibotn/quimb generic TN — expectation values."""
import multiprocessing
multiprocessing.set_start_method("spawn", force=True)
import pickle
import time
import threading
import argparse
import numpy as np
import cotengra as ctg
import qibo
from qibo import Circuit, gates
class TimedTrialFn:
def __init__(self, trial_fn, timeout=15):
self.trial_fn = trial_fn
self.timeout = timeout
def __call__(self, *args, **kwargs):
result = [None]
exc = [None]
def _run():
try:
result[0] = self.trial_fn(*args, **kwargs)
except Exception as e:
exc[0] = e
t = threading.Thread(target=_run, daemon=True)
t.start()
t.join(self.timeout)
if t.is_alive():
raise TimeoutError(f"trial exceeded {self.timeout}s")
if exc[0] is not None:
raise exc[0]
return result[0]
def make_circuit(circuit_type, nqubits, nlayers=1):
c = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
return QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for q in range(nqubits):
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CZ(q, q + 1))
elif circuit_type == "ghz":
c.add(gates.H(0))
for q in range(nqubits - 1):
c.add(gates.CNOT(q, q + 1))
elif circuit_type == "brickwork":
for q in range(nqubits):
c.add(gates.H(q))
for layer in range(nlayers):
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CNOT(q, q + 1))
c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi)))
c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi)))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
return c
def make_z_observable(nqubits):
"""Z on qubit 0 only — single contraction for benchmarking"""
return ["z"], [(0,)], [1.0]
def run_quimb_tn(circuit, nqubits, num_slices, load_path=None, save_path=None):
"""Mode: expval — compute <Z_0> via local_expectation (lightcone pruning)."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
operators, sites, coeffs = make_z_observable(nqubits)
ops = b._string_to_quimb_operator(operators[0])
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10})
if load_path:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree = saved["tree"]
t_search = 0.0
print(f" [path loaded] {load_path}")
else:
opt = ctg.HyperOptimizer(
methods=['kahypar', 'random-greedy', 'spinglass'],
max_repeats=16,
parallel=True,
max_time=60,
slicing_opts={'target_slices': num_slices},
progbar=True,
)
t0 = time.time()
rehearsal = qc.local_expectation(ops, where=sites[0], optimize=opt,
simplify_sequence="R", rehearse=True)
t_search = time.time() - t0
tree = rehearsal['tree']
print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}")
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree}, f)
print(f" [path saved] {save_path}")
t0 = time.time()
expval = qc.local_expectation(ops, where=sites[0], optimize=tree, simplify_sequence="R")
t_contract = time.time() - t0
print(f" [contraction] {t_contract:.3f}s")
return float(expval.real), t_search + t_contract
def run_quimb_tn_statevector(circuit, nqubits, num_slices, load_path=None, save_path=None):
"""Mode: statevector — contract full TN to dense vector."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
import torch
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10})
qc.to_backend = lambda x: torch.from_numpy(x).to(torch.complex64)
if load_path:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree = saved["tree"]
t_search = 0.0
print(f" [path loaded] {load_path}")
else:
opt = ctg.HyperOptimizer(
#methods=['kahypar', 'random-greedy', 'spinglass'],
max_repeats=1024,
#parallel="concurrent.futures",
parallel=64,
max_time=60,
minimize='size',
slicing_opts={'target_slices': num_slices},
#slicing_opts={'target_size': 2**30},
progbar=True,
on_trial_error='ignore'
)
t0 = time.time()
rehearsal = qc.to_dense(optimize=opt, rehearse=True)
t_search = time.time() - t0
tree = rehearsal['tree']
#print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}")
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree}, f)
print(f" [path saved] {save_path}")
print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}")
return None, t_search
t0 = time.time()
sv = qc.to_dense(optimize=tree).reshape(-1)
t_contract = time.time() - t0
print(f" [contraction] {t_contract:.3f}s")
sv_tn = np.array(sv)
return sv_tn, t_search + t_contract
def _contract_mpi(tree, arrays, comm, root=0):
"""Contract slices via MPI, returning result as the same array type as input.
Unlike ``cotengra.ContractionTree.contract_mpi``, this works with any
array backend (numpy, torch, etc.) — it only converts to numpy at the
MPI-reduce boundary and converts back.
"""
size = comm.Get_size()
rank = comm.Get_rank()
result_np = None
is_torch = type(arrays[0]).__module__.startswith("torch")
for i in range(rank, tree.multiplicity, size):
x = tree.contract_slice(arrays, i)
x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x))
if result_np is None:
result_np = x_np
else:
result_np += x_np
if result_np is None:
result_np = np.zeros(1, dtype=np.complex64)
if rank == root:
result = np.zeros_like(result_np)
else:
result = None
comm.Reduce(result_np, result, root=root)
if rank == root:
import torch
return torch.from_numpy(np.asarray(result)) if is_torch else result
return None
def run_quimb_tn_statevector_mpi(circuit, nqubits, num_slices, load_path=None, save_path=None):
"""MPI-parallel statevector via custom MPI contraction (supports torch backend)."""
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
import torch
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10})
qc.to_backend = lambda x: torch.from_numpy(x).to(torch.complex64)
if load_path:
if rank == 0:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree, psi, t_search = saved["tree"], saved["psi"], 0.0
print(f" [path loaded] {load_path}")
else:
tree = psi = None
t_search = 0.0
else:
# each rank runs serial search over its share of trials
total_repeats = 1024
rank_repeats = max(1, total_repeats // size)
opt = ctg.HyperOptimizer(
methods=['kahypar', 'random-greedy', 'spinglass'],
max_repeats=rank_repeats,
parallel=False,
max_time=100,
minimize='size',
slicing_opts={'target_slices': max(num_slices, size), 'allow_outer': False},
progbar=(rank == 0),
)
t0 = time.time()
rehearsal = qc.to_dense(optimize=opt, rehearse=True)
t_search = time.time() - t0
local_tree = rehearsal['tree']
local_psi = rehearsal['tn']
local_size = local_tree.contraction_width()
# gather all trees to rank 0, pick best by contraction_width
all_results = comm.gather((local_size, local_tree, local_psi), root=0)
if rank == 0:
_, tree, psi = min(all_results, key=lambda x: x[0])
print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f} slices={tree.multiplicity}")
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree, "psi": psi}, f)
print(f" [path saved] {save_path}")
else:
tree = psi = None
tree = comm.bcast(tree, root=0)
psi = comm.bcast(psi, root=0)
t_search = comm.bcast(t_search, root=0)
arrays = psi.arrays
t0 = time.time()
sv = _contract_mpi(tree, arrays, comm, root=0)
t_contract = time.time() - t0
if rank == 0:
print(f" [contraction] {t_contract:.3f}s")
return np.array(sv).reshape(-1), t_search + t_contract
return None, t_search + t_contract
def run_quimb_tn_samples(circuit, nshots=1024):
"""Mode: samples — sample from circuit output distribution."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
t0 = time.time()
result = b.execute_circuit(circuit, nshots=nshots)
t_total = time.time() - t0
print(f" [sampling] {t_total:.3f}s nshots={nshots}")
print(f" top states: {dict(list(result.frequencies().items())[:5])}")
return result, t_total
def qibojit_expval(circuit, nqubits):
"""Compute <Z_0> via qibojit statevector."""
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = np.array(result.state(), dtype=complex).flatten()
probs = np.abs(sv) ** 2
bits = (np.arange(len(probs)) >> (nqubits - 1)) & 1
expval = float(np.dot(probs, 1 - 2 * bits))
return expval, elapsed
def run_qibojit(circuit):
"""Compute full statevector via qibojit."""
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = np.array(result.state(), dtype=complex).flatten()
return sv, elapsed
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--nqubits", type=int, default=10)
parser.add_argument("--circuit", type=str, default="qft",
choices=["qft", "variational", "ghz", "brickwork"])
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--num-slices", type=int, default=1)
parser.add_argument("--nshots", type=int, default=1024)
parser.add_argument("--mode", type=str, default="statevector",
choices=["expval", "statevector", "samples"],
help="expval: local_expectation; statevector: to_dense; samples: sampling")
parser.add_argument("--mpi", action="store_true",
help="Use MPI-parallel contraction (run with mpirun -n N)")
parser.add_argument("--no-compare", action="store_true",
help="Skip qibojit reference run")
parser.add_argument("--save-path", type=str, default=None,
help="Save contraction tree to a pickle file")
parser.add_argument("--load-path", type=str, default=None,
help="Load contraction tree from a pickle file (skip path search)")
args = parser.parse_args()
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}, mode={args.mode}")
np.random.seed(42)
circuit_tn = make_circuit(args.circuit, args.nqubits, args.nlayers)
try:
if args.mode == "expval":
expval_tn, t_tn = run_quimb_tn(circuit_tn, args.nqubits, args.num_slices,
load_path=args.load_path, save_path=args.save_path)
print(f"\n[quimb TN] time={t_tn:.4f}s <Z_0>={expval_tn:.8f}")
elif args.mode == "statevector":
if args.mpi:
sv_tn, t_tn = run_quimb_tn_statevector_mpi(circuit_tn, args.nqubits, args.num_slices,
load_path=args.load_path, save_path=args.save_path)
else:
sv_tn, t_tn = run_quimb_tn_statevector(circuit_tn, args.nqubits, args.num_slices,
load_path=args.load_path, save_path=args.save_path)
if sv_tn is not None:
print(f"\n[quimb TN] time={t_tn:.4f}s statevector shape={sv_tn.shape}")
np.save(f"data/sv_tn_{args.circuit}{args.nqubits}.npy", sv_tn)
else:
_, t_tn = run_quimb_tn_samples(circuit_tn, args.nqubits, args.nshots)
print(f"\n[quimb TN] time={t_tn:.4f}s")
args.no_compare = True # samples 模式无法和 qibojit 期望值对比
except Exception as e:
print(f"[quimb TN] FAILED: {e}")
raise
if not args.no_compare and args.mode != "statevector":
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
expval_ref, t_ref = qibojit_expval(circuit_ref, args.nqubits)
print(f"[qibojit] time={t_ref:.4f}s <Z_0>={expval_ref:.8f}")
print(f"\nDiff : {abs(expval_tn - expval_ref):.2e}")
if t_tn > 0:
print(f"Speedup : {t_ref/t_tn:.2f}x")
elif not args.no_compare and args.mode == "statevector" and sv_tn is not None:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
sv_ref, t_ref = run_qibojit(circuit_ref)
fid = abs(np.dot(sv_ref.conj(), sv_tn)) ** 2
l2_err = np.linalg.norm(sv_ref - sv_tn)
print(f"[qibojit] time={t_ref:.4f}s")
print(f"Fidelity : {fid:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_tn > 0:
print(f"Speedup : {t_ref/t_tn:.2f}x")
if __name__ == "__main__":
main()

246
benchmark_tn_mpi.py Normal file
View File

@@ -0,0 +1,246 @@
"""MPI-parallel TN benchmark: path search + contraction via MPI."""
import pickle
import time
import argparse
import numpy as np
import cotengra as ctg
import qibo
from qibo import Circuit, gates
from mpi4py import MPI
from concurrent.futures import ProcessPoolExecutor, as_completed
def _run_serial_search(tn_bytes, output_inds, repeats, seed, num_slices, n_ranks):
"""Run one serial HyperOptimizer in a subprocess, return (width, tree)."""
import pickle, cotengra as ctg, random
random.seed(seed)
tn = pickle.loads(tn_bytes)
opt = ctg.HyperOptimizer(
methods=['kahypar', 'kahypar-agglom', 'spinglass'],
max_repeats=repeats,
parallel=False,
minimize='flops',
max_time=600,
optlib="random",
slicing_opts={'target_size': 2**30, 'allow_outer': False},
progbar=False,
)
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
return tree.contraction_width(), tree
def parallel_search(tn, output_inds, total_repeats, n_workers, num_slices, n_ranks,
timeout=None):
"""Launch n_workers subprocesses each running serial search, return best tree."""
import pickle, os, signal
from concurrent.futures import ProcessPoolExecutor, as_completed
tn_bytes = pickle.dumps(tn)
repeats_per = max(1, total_repeats // n_workers)
best_width, best_tree = float('inf'), None
with ProcessPoolExecutor(max_workers=n_workers) as pool:
futures = {
pool.submit(_run_serial_search, tn_bytes, output_inds,
repeats_per, seed, num_slices, n_ranks): seed
for seed in range(n_workers)
}
pids = {f: p.pid for f, p in zip(futures, pool._processes.values())}
try:
for fut in as_completed(futures, timeout=timeout):
try:
width, tree = fut.result()
if width < best_width:
best_width, best_tree = width, tree
except Exception as e:
print(f" [worker failed] {e}")
except TimeoutError:
pass
for fut, pid in pids.items():
if not fut.done():
try:
os.kill(pid, signal.SIGKILL)
except ProcessLookupError:
pass
return best_tree
def make_circuit(circuit_type, nqubits, nlayers=1):
c = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
return QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for q in range(nqubits):
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CZ(q, q + 1))
elif circuit_type == "ghz":
c.add(gates.H(0))
for q in range(nqubits - 1):
c.add(gates.CNOT(q, q + 1))
elif circuit_type == "brickwork":
for q in range(nqubits):
c.add(gates.H(q))
for layer in range(nlayers):
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CNOT(q, q + 1))
c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi)))
c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi)))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
return c
def _contract_mpi(tree, arrays, comm, root=0):
rank = comm.Get_rank()
size = comm.Get_size()
is_torch = type(arrays[0]).__module__.startswith("torch")
result_np = None
for i in range(rank, tree.multiplicity, size):
x = tree.contract_slice(arrays, i)
x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x))
result_np = x_np if result_np is None else result_np + x_np
if result_np is None:
result_np = np.zeros(1, dtype=np.complex64)
result = np.zeros_like(result_np) if rank == root else None
comm.Reduce(result_np, result, root=root)
if rank == root:
import torch
return torch.from_numpy(np.asarray(result)) if is_torch else result
return None
def run_mpi(circuit, nqubits, num_slices, total_repeats=1024,
load_path=None, save_path=None):
"""Each MPI rank runs serial path search over total_repeats/size trials,
rank 0 picks the global best, then all ranks contract in parallel."""
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
import torch
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10})
qc.to_backend = lambda x: torch.from_numpy(x).to(torch.complex64)
# --- path search: each rank serial, gather best to rank 0 ---
if load_path:
if rank == 0:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree, psi, t_search = saved["tree"], saved["psi"], 0.0
print(f" [path loaded] {load_path}")
else:
tree = psi = None
t_search = 0.0
else:
rank_repeats = max(1, total_repeats // size)
t0 = time.time()
# get TN object first (no contraction), then run parallel search
psi_tn = qc.to_dense(rehearse="tn")
local_tree = parallel_search(
psi_tn, psi_tn.outer_inds(), rank_repeats, n_workers=48,
num_slices=num_slices, n_ranks=size, timeout=630,
)
t_search = time.time() - t0
local_psi = psi_tn
all_results = comm.gather((local_tree.contraction_width(), local_tree, local_psi), root=0)
if rank == 0:
_, tree, psi = min(all_results, key=lambda x: x[0])
print(f" [path search] {t_search:.3f}s "
f"flops~2^{tree.contraction_cost():.2f} "
f"size~2^{tree.contraction_width():.2f} "
f"slices={tree.multiplicity}")
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree, "psi": psi}, f)
print(f" [path saved] {save_path}")
else:
tree = psi = None
if save_path:
t_search = comm.bcast(t_search, root=0)
return None, t_search
tree = comm.bcast(tree, root=0)
psi = comm.bcast(psi, root=0)
t_search = comm.bcast(t_search, root=0)
# --- contraction: all ranks work in parallel ---
import torch
torch.set_num_threads(max(1, 48 // size))
arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex64) for a in psi.arrays]
t0 = time.time()
sv = _contract_mpi(tree, arrays, comm, root=0)
t_contract = time.time() - t0
if rank == 0:
print(f" [contraction] {t_contract:.3f}s")
return np.array(sv).reshape(-1), t_search + t_contract
return None, t_search + t_contract
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--nqubits", type=int, default=30)
parser.add_argument("--circuit", type=str, default="qft",
choices=["qft", "variational", "ghz", "brickwork"])
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--num-slices", type=int, default=1)
parser.add_argument("--total-repeats", type=int, default=1024)
parser.add_argument("--save-path", type=str, default=None)
parser.add_argument("--load-path", type=str, default=None)
parser.add_argument("--no-compare", action="store_true")
args = parser.parse_args()
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
if rank == 0:
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, "
f"nlayers={args.nlayers}, ranks={comm.Get_size()}")
np.random.seed(42)
circuit = make_circuit(args.circuit, args.nqubits, args.nlayers)
try:
sv, t_total = run_mpi(circuit, args.nqubits, args.num_slices,
total_repeats=args.total_repeats,
load_path=args.load_path, save_path=args.save_path)
except Exception as e:
if rank == 0:
print(f"[FAILED] {e}")
raise
if rank == 0 and sv is not None:
print(f"\n[quimb TN MPI] time={t_total:.4f}s shape={sv.shape}")
np.save(f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy", sv)
if not args.no_compare:
from benchmark_tn import run_qibojit
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
sv_ref, t_ref = run_qibojit(circuit_ref)
fid = abs(np.dot(sv_ref.conj(), sv)) ** 2
print(f"[qibojit] time={t_ref:.4f}s")
print(f"Fidelity : {fid:.8f}")
print(f"L2 error : {np.linalg.norm(sv_ref - sv):.2e}")
if t_total > 0:
print(f"Speedup : {t_ref/t_total:.2f}x")
if __name__ == "__main__":
main()

51
compare_jit_tn_quimb.py Normal file
View File

@@ -0,0 +1,51 @@
import numpy as np
import os
import sys
def check_results(ref_path, tn_path):
# 1. 检查文件是否存在
if not os.path.exists(ref_path) or not os.path.exists(tn_path):
print(f"Error: 找不到文件!\n参考文件: {ref_path}\n待测文件: {tn_path}")
return
print(f"正在加载数据并对比: \n [Ref] {ref_path}\n [TN ] {tn_path}\n")
try:
# 2. 加载状态向量
# mmap_mode='r' 可以防止大文件直接撑爆内存
sv_ref = np.load(ref_path, mmap_mode='r')
sv_tn = np.load(tn_path, mmap_mode='r')
# 3. 计算保真度 (Fidelity)
# fid = |<ref|tn>|^2
inner_product = np.dot(sv_ref.conj(), sv_tn)
fidelity = np.abs(inner_product)**2
# 4. 计算 L2 误差 (欧氏距离)
l2_error = np.linalg.norm(sv_ref - sv_tn)
# 5. 打印结果
print("-" * 30)
print(f"保真度 (Fidelity): {fidelity:.12f}")
#print(f"L2 范数误差: {l2_error:.2e}")
print("-" * 30)
# phase-invariant L2: align global phase first
phase = inner_product / np.abs(inner_product)
l2_phase_corrected = np.linalg.norm(sv_ref - sv_tn / phase)
print(f"L2 误差(相位校正后): {l2_phase_corrected:.2e}")
if fidelity > 0.999999:
print("✅ 验证通过:结果高度一致。")
else:
print("❌ 警告:保真度较低,请检查收缩路径或截断误差。")
except Exception as e:
print(f"计算过程中发生错误: {e}")
if __name__ == "__main__":
# 你可以在这里直接修改文件名
REF_FILE = 'data/sv_qibojit_qft30.npy'
TN_FILE = 'data/sv_tn_qft30_mpi.npy'
check_results(REF_FILE, TN_FILE)

View File

@@ -1,35 +1,35 @@
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=source
set BUILDDIR=build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd

2
hostfile Normal file
View File

@@ -0,0 +1,2 @@
10.20.6.74:1
10.20.6.102:1

21
inspect_path.py Normal file
View File

@@ -0,0 +1,21 @@
import pickle
import sys
path = sys.argv[1] if len(sys.argv) > 1 else 'path/path.pkl.34.mpi'
with open(path, 'rb') as f:
d = pickle.load(f)
tree = d['tree']
width = tree.contraction_width()
slices = tree.multiplicity
sliced_width = width - (slices.bit_length() - 1)
print(f"Path: {path}")
print(f"Width (unsliced): {width:.1f}")
print(f"Slices: {slices}")
print(f"Sliced width: {sliced_width:.1f}")
print(f"Peak memory (total): {2**width * 16 / 1e9:.1f} GB")
print(f"Peak per slice: {2**sliced_width * 16 / 1e9:.2f} GB")
print(f"Sliced indices: {len(tree.sliced_inds)}")
print(f"Cost (log2 flops): {tree.contraction_cost(log=True):.2f}")

6
poetry.lock generated
View File

@@ -1733,14 +1733,14 @@ files = [
[[package]]
name = "mako"
version = "1.3.10"
version = "1.3.11"
description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
optional = false
python-versions = ">=3.8"
groups = ["main"]
files = [
{file = "mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59"},
{file = "mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28"},
{file = "mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77"},
{file = "mako-1.3.11.tar.gz", hash = "sha256:071eb4ab4c5010443152255d77db7faa6ce5916f35226eb02dc34479b6858069"},
]
[package.dependencies]

18
run_qibojit_ref.py Normal file
View File

@@ -0,0 +1,18 @@
"""Run qibojit on 30-qubit QFT and save statevector for comparison."""
import time
import numpy as np
import qibo
from qibo.models import QFT
#np.random.seed(42)
circuit = QFT(32)
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = np.array(result.state(), dtype=complex).flatten()
np.save("data/sv_qibojit_qft30.npy", sv)
print(f"[qibojit] time={elapsed:.4f}s shape={sv.shape}")
print(f"Saved to sv_qibojit_qft30.npy")

View File

@@ -167,7 +167,7 @@ def execute_circuit(
raise_error(ValueError, "Initial state not None supported only for MPS ansatz.")
circ_quimb = self.circuit_ansatz.from_openqasm2_str(
circuit.to_qasm(), psi0=initial_state
circuit.to_qasm(), psi0=initial_state, gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff}
)
if nshots:
@@ -186,7 +186,16 @@ def execute_circuit(
else:
frequencies = None
measured_probabilities = None
'''
if return_array:
if self.ansatz == "mps":
psi = circ_quimb.psi
statevector = psi.to_dense().reshape(-1)
else:
statevector = circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer)
else:
statevector = None
'''
statevector = (
circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer)
if return_array
@@ -291,6 +300,15 @@ def _qibo_circuit_to_quimb(
quimb_gate_name = GATE_MAP.get(gate_name, None)
if quimb_gate_name == "measure":
continue
if gate_name == "cu1":
theta = gate.parameters[0]
c, t = gate.qubits
circ.apply_gate("RZ", theta / 2, c)
circ.apply_gate("RZ", theta / 2, t)
circ.apply_gate("CNOT", c, t)
circ.apply_gate("RZ", -theta / 2, t)
circ.apply_gate("CNOT", c, t)
continue
if quimb_gate_name is None:
raise_error(ValueError, f"Gate {gate_name} not supported in Quimb backend.")

View File

@@ -57,10 +57,10 @@ class TensorNetworkResult:
return self.measures
def state(self):
"""Return the statevector if the number of qubits is less than 20."""
if self.nqubits < 20:
"""Return the statevector if the number of qubits is less than 35."""
if self.nqubits < 35:
return self.statevector
raise_error(
NotImplementedError,
f"Tensor network simulation cannot be used to reconstruct statevector for >= 20 .",
f"Tensor network simulation cannot be used to reconstruct statevector for >= 35 .",
)

39
sweep_bond_32q.py Normal file
View File

@@ -0,0 +1,39 @@
"""Bond dimension sweep for 32-qubit variational circuit."""
import os
import sys
import numpy as np
sys.path.insert(0, os.path.dirname(__file__))
from benchmark_mps import make_circuit, run_qibojit, run_quimb_mps, compare, jit_cache_path, DATA_DIR
NQUBITS = 32
NLAYERS = 5
BOND_VALUES = [1, 8, 16, 32, 64, 128, 256]
SVD_CUTOFF = 1e-6
OPTIMIZER = "auto-hq"
if __name__ == "__main__":
cache_path = jit_cache_path("variational", NQUBITS, NLAYERS)
if os.path.exists(cache_path):
sv_ref = np.load(cache_path)
print(f"[qibojit] loaded from cache: {cache_path}\n")
else:
np.random.seed(42)
circuit_ref = make_circuit("variational", NQUBITS, NLAYERS)
sv_ref, t_ref = run_qibojit(circuit_ref)
np.save(cache_path, sv_ref)
print(f"[qibojit] time={t_ref:.4f}s (saved to {cache_path})\n")
print(f"{'bond':>6} {'time(s)':>10} {'fidelity':>12} {'l2_err':>10}")
print("-" * 46)
for bond in BOND_VALUES:
np.random.seed(42)
circuit_mps = make_circuit("variational", NQUBITS, NLAYERS)
try:
sv_mps, t_mps = run_quimb_mps(circuit_mps, bond, SVD_CUTOFF, OPTIMIZER)
fidelity, l2_err = compare(sv_ref, sv_mps)
print(f"{bond:>6} {t_mps:>10.4f} {fidelity:>12.8f} {l2_err:>10.2e}")
except Exception as e:
print(f"{bond:>6} FAILED: {e}")

View File

@@ -1,27 +0,0 @@
import time
import pickle
def run(input="tree.pkl"):
with open(input, "rb") as f:
data = pickle.load(f)
sliced_tree = data["sliced_tree"]
arrays = data["arrays"]
n_slices = sliced_tree.nslices
print(f"Total slices: {n_slices}")
t0 = time.perf_counter()
total = sum(sliced_tree.contract_slice(arrays, i, backend='numpy',implementation='cotengra') for i in range(n_slices))
t1 = time.perf_counter()
print(f"Contract: {t1 - t0:.4f} s")
#print(f"Result: {total:.10f}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--input", type=str, default="tree.pkl.bak")
args = parser.parse_args()
run(args.input)

View File

@@ -1,60 +0,0 @@
"""生成比赛常用测试电路的 QASM 文件。"""
import argparse
import qibo
from qibo.models import QFT, Circuit
from qibo import gates
import numpy as np
qibo.set_backend("numpy")
def gen_qft(n_qubits):
return QFT(n_qubits, with_swaps=True).to_qasm()
def gen_random(n_qubits, depth, seed):
rng = np.random.default_rng(seed)
c = Circuit(n_qubits)
for _ in range(depth):
for q in range(n_qubits):
c.add(gates.H(q))
for q in range(0, n_qubits - 1, 2):
c.add(gates.CZ(q, q + 1))
return c.to_qasm()
def gen_supremacy(n_qubits, depth, seed):
"""Google supremacy 风格:随机单比特门 + CZ"""
rng = np.random.default_rng(seed)
single = [gates.X, gates.Y, gates.H]
c = Circuit(n_qubits)
for _ in range(depth):
for q in range(n_qubits):
g = single[rng.integers(3)]
c.add(g(q))
for q in range(0, n_qubits - 1, 2):
c.add(gates.CZ(q, q + 1))
for q in range(1, n_qubits - 1, 2):
c.add(gates.CZ(q, q + 1))
return c.to_qasm()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--circuit", default="qft", choices=["qft", "random", "supremacy"])
parser.add_argument("--n_qubits", type=int, default=20)
parser.add_argument("--depth", type=int, default=10)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--out", default="circuit.qasm")
args = parser.parse_args()
if args.circuit == "qft":
qasm = gen_qft(args.n_qubits)
elif args.circuit == "random":
qasm = gen_random(args.n_qubits, args.depth, args.seed)
else:
qasm = gen_supremacy(args.n_qubits, args.depth, args.seed)
with open(args.out, "w") as f:
f.write(qasm)
print(f"Written: {args.out} ({args.n_qubits} qubits, {args.circuit})")

View File

@@ -1,2 +0,0 @@
192.168.20.102
192.168.20.101

View File

@@ -1,126 +0,0 @@
"""
MPI + ThreadPoolExecutor 混合并行张量网络收缩。
每个 MPI rank 负责一部分 slicestride 分配),
rank 内用 ThreadPoolExecutor 并行执行各 slice每线程一个 slice
用法:
mpirun -n <N> python mpi_v.py --qasm circuit.qasm --target-slices 16 --threads 8
"""
import os
import time
import argparse
import numpy as np
from concurrent.futures import ThreadPoolExecutor, as_completed
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
import quimb.tensor as qtn
import cotengra as ctg
def _contract_slice(sliced_tree, arrays, idx):
return sliced_tree.contract_slice(arrays, idx, backend="numpy")
def run(qasm_path, target_slices, n_threads, max_repeats):
# ── 构建张量网络rank 0broadcast arrays──
if rank == 0:
with open(qasm_path) as f:
qasm_str = f.read()
# 不用 full_simplify保持 outer_inds 完整
psi = qtn.Circuit.from_openqasm2_str(qasm_str).psi
n_qubits = len([i for i in psi.outer_inds() if i.startswith("k")])
output_inds = [f"k{i}" for i in range(n_qubits)]
arrays = [t.data for t in psi.tensors]
else:
psi = None
n_qubits = None
arrays = None
output_inds = None
n_qubits = comm.bcast(n_qubits, root=0)
arrays = comm.bcast(arrays, root=0)
output_inds = comm.bcast(output_inds, root=0)
# ── 路径搜索rank 0+ broadcast ──
t0 = time.perf_counter()
if rank == 0:
opt = ctg.HyperOptimizer(
methods=["kahypar", "greedy"],
max_repeats=max_repeats,
minimize="flops",
parallel=min(96, os.cpu_count()),
)
tree = psi.contraction_tree(optimize=opt, output_inds=output_inds)
n = target_slices
sliced_tree = None
while n >= 1:
try:
sliced_tree = tree.slice(target_size=n, allow_outer=False)
break
except RuntimeError:
n //= 2
if sliced_tree is None:
sliced_tree = tree.slice(target_slices=1, allow_outer=True)
print(f"[rank 0] path search: {time.perf_counter()-t0:.2f}s slices: {sliced_tree.nslices}", flush=True)
else:
sliced_tree = None
sliced_tree = comm.bcast(sliced_tree, root=0)
n_slices = sliced_tree.nslices
# ── 分布式收缩MPI stride + ThreadPoolExecutor──
my_indices = list(range(rank, n_slices, size))
local_result = np.zeros(2**n_qubits, dtype=np.complex128)
comm.Barrier()
t1 = time.perf_counter()
with ThreadPoolExecutor(max_workers=n_threads) as pool:
for batch_start in range(0, len(my_indices), n_threads):
batch = my_indices[batch_start:batch_start + n_threads]
futures = {pool.submit(_contract_slice, sliced_tree, arrays, i): i for i in batch}
for fut in as_completed(futures):
local_result += np.array(fut.result()).flatten()
t2 = time.perf_counter()
if rank == 0:
print(f"[rank 0] contract: {t2-t1:.2f}s", flush=True)
# ── MPI reduce ──
total = comm.reduce(local_result, op=MPI.SUM, root=0)
if rank == 0:
print(f"result norm: {np.linalg.norm(total):.10f}", flush=True)
print(f"total time: {t2-t0:.2f}s", flush=True)
return total
return None
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--qasm", required=True, help="QASM 文件路径")
parser.add_argument("--target-slices", type=int, default=None,
help="目标切片数量(优先于 target-size")
parser.add_argument("--target-size", type=int, default=28,
help="切片目标大小指数2^N默认 28")
parser.add_argument("--threads", type=int, default=max(1, os.cpu_count() // size),
help="每个 rank 的线程数,默认 cpu_count/size")
parser.add_argument("--max-repeats", type=int, default=256,
help="cotengra 路径搜索重复次数")
args = parser.parse_args()
target = args.target_slices if args.target_slices else 2**args.target_size
mode = "slices" if args.target_slices else f"size=2^{args.target_size}"
if rank == 0:
print(f"ranks={size} threads/rank={args.threads} target_{mode}", flush=True)
run(args.qasm, target, args.threads, args.max_repeats)
if __name__ == "__main__":
main()

View File

@@ -1,68 +0,0 @@
import os
import time
import numpy as np
import quimb.tensor as qtn
import cotengra as ctg
'''
# --- 1. 关键:在导入 numpy/quimb 之前设置环境变量 ---
# 告诉底层 BLAS 库 (MKL/OpenBLAS) 使用 96 个线程
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
# 优化线程亲和性,避免线程在不同 CPU 核心间跳变,提升缓存命中率
os.environ["KMP_AFFINITY"] = "granularity=fine,compact,1,0"
os.environ["KMP_BLOCKTIME"] = "0"
'''
# 现在导入库
import psutil
def run_baseline(n_qubits=50, depth=20):
print(f"🚀 {n_qubits} Qubits, Depth {depth}")
print(f"💻 Detected Logical Cores: {os.cpu_count()}")
# 1. 构建电路 (必须 complex128 保证精度)
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
for d in range(depth):
for i in range(n_qubits):
circ.apply_gate('H', i)
for i in range(0, n_qubits - 1, 2):
circ.apply_gate('CZ', i, i + 1)
psi = circ.psi
# 2. 构建闭合网络 <psi|psi>
net = psi.conj() & psi
# 3. 路径搜索参数 (Kahypar)
print("🔍 Searching path with Kahypar...")
opt = ctg.HyperOptimizer(
methods=['kahypar'],
max_repeats=128,
parallel=96,
minimize='flops',
on_trial_error='ignore'
)
# 4. 阶段1路径搜索
t0 = time.perf_counter()
tree = net.contraction_tree(optimize=opt)
t1 = time.perf_counter()
print(f"🔍 Path search done: {t1 - t0:.4f} s")
# 5. 阶段2张量收缩
result = net.contract(optimize=tree, backend='numpy')
t2 = time.perf_counter()
peak_mem = psutil.Process().memory_info().rss / 1024**3
print(f"✅ Done!")
print(f"⏱️ Contract: {t2 - t1:.4f} s | Total: {t2 - t0:.4f} s")
print(f"💾 Peak Memory: {peak_mem:.2f} GB")
print(f"🔢 Result: {result:.10f}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--n_qubits", type=int, default=50)
parser.add_argument("--depth", type=int, default=20)
args = parser.parse_args()
run_baseline(n_qubits=args.n_qubits, depth=args.depth)

View File

@@ -1,90 +0,0 @@
import time
import numpy as np
import quimb.tensor as qtn
import cotengra as ctg
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
def build_qft(n_qubits):
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
for i in range(n_qubits):
circ.apply_gate('H', i)
for j in range(i + 1, n_qubits):
circ.apply_gate('CPHASE', np.pi / 2 ** (j - i), i, j)
return circ
def run_mpi(n_qubits, depth=None):
if rank == 0:
print(f"MPI size: {size} ranks")
print(f"Circuit: QFT {n_qubits} qubits")
circ = build_qft(n_qubits)
psi = circ.psi
# 期望值网络:<psi|Z_0|psi>
Z = np.array([[1, 0], [0, -1]], dtype=np.complex128)
bra = psi.conj().reindex({f'k{i}': f'b{i}' for i in range(n_qubits)})
obs = qtn.Tensor(Z, inds=(f'k0', f'b0'))
net = psi & obs & bra
# 2. 所有 rank 并行搜索路径rank 0 选全局最优
t0 = time.perf_counter()
repeats_per_rank = max(1, 128 // size)
opt = ctg.HyperOptimizer(
methods=['kahypar'],
#methods=['greedy'],
#max_repeats=repeats_per_rank,
max_repeats=repeats_per_rank,
minimize='flops',
parallel=max(1, 96 // size),
)
local_tree = net.contraction_tree(optimize=opt)
all_trees = comm.gather(local_tree, root=0)
if rank == 0:
tree = min(all_trees, key=lambda t: t.contraction_cost())
t1 = time.perf_counter()
print(f"[rank 0] Path search: {t1 - t0:.4f} s")
else:
tree = None
tree = comm.bcast(tree, root=0)
# 3. rank 0 切片broadcast sliced_tree
if rank == 0:
sliced_tree = tree.slice(target_size=2**27)
else:
sliced_tree = None
sliced_tree = comm.bcast(sliced_tree, root=0)
n_slices = sliced_tree.nslices
if rank == 0:
print(f"Total slices: {n_slices}, each rank handles ~{n_slices // size}")
arrays = [t.data for t in net.tensors]
# 每个 rank 处理自己负责的切片
t2 = time.perf_counter()
local_result = 0.0 + 0.0j
for i in range(rank, n_slices, size):
local_result += sliced_tree.contract_slice(arrays, i, backend='numpy')
t3 = time.perf_counter()
# 4. reduce 汇总到 rank 0
total = comm.reduce(local_result, op=MPI.SUM, root=0)
if rank == 0:
print(f"[rank 0] Contract: {t3 - t2:.4f} s")
print(f"Result: {total:.10f}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--n_qubits", type=int, default=20)
parser.add_argument("--depth", type=int, default=30)
args = parser.parse_args()
run_mpi(args.n_qubits, args.depth)

View File

@@ -1,103 +0,0 @@
import time
import numpy as np
import quimb.tensor as qtn
import cotengra as ctg
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
def build_qft_circuit(n_qubits):
"""构建标准 QFT 电路"""
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
for i in range(n_qubits):
# 1. 施加 H 门
circ.apply_gate('H', i)
# 2. 施加受控相位旋转
for j in range(i + 1, n_qubits):
theta = np.pi / (2**(j - i))
circ.apply_gate('CPHASE', theta, i, j)
return circ
def run_mpi(n_qubits):
if rank == 0:
print(f"MPI size: {size} ranks")
print(f"Circuit: QFT {n_qubits} qubits")
# 1. 所有 rank 独立构建 QFT 电路
circ = build_qft_circuit(n_qubits)
# 物理观测:计算 <psi|psi>,结果应为 1.0
# 注意QFT 是幺正变换,末态模长平方必为 1
psi = circ.psi
net = psi.conj() & psi
# 2. 路径搜索优化
t0 = time.perf_counter()
# 每个 rank 尝试不同的种子,增加找到全局最优路径的概率
repeats_per_rank = max(1, 256 // size)
opt = ctg.HyperOptimizer(
methods=['kahypar'],
max_repeats=repeats_per_rank,
minimize='flops',
parallel=max(1, 96 // size),
)
# 搜索收缩树
local_tree = net.contraction_tree(optimize=opt)
# 汇总所有 rank 找到的树,在 rank 0 选出 FLOPs 最低的那棵
all_trees = comm.gather(local_tree, root=0)
if rank == 0:
tree = min(all_trees, key=lambda t: t.contraction_cost())
t1 = time.perf_counter()
print(f"[rank 0] Path search: {t1 - t0:.4f} s")
print(f"[rank 0] Best path FLOPs: {tree.contraction_cost():.2e}")
else:
tree = None
# 将最优路径广播给所有进程
tree = comm.bcast(tree, root=0)
# 3. 切片处理(性能控制核心)
if rank == 0:
# 比赛建议:将 target_size 设为能填满单进程内存的 50%-70%
# 或者改用 target_slices=size * 4 以确保负载绝对平衡
sliced_tree = tree.slice(target_size=2**27)
else:
sliced_tree = None
sliced_tree = comm.bcast(sliced_tree, root=0)
n_slices = sliced_tree.nslices
if rank == 0:
print(f"Total slices: {n_slices}, each rank handles ~{n_slices // size + 1}")
# 获取原始张量数据
arrays = [t.data for t in net.tensors]
# 4. 执行收缩计算
t2 = time.perf_counter()
local_result = 0.0 + 0.0j
# 简单的静态负载均衡:每个 rank 跳步处理切片
for i in range(rank, n_slices, size):
local_result += sliced_tree.contract_slice(arrays, i, backend='numpy')
t3 = time.perf_counter()
# 5. 结果汇总
total = comm.reduce(local_result, op=MPI.SUM, root=0)
if rank == 0:
duration = t3 - t2
print(f"[rank 0] Contract: {duration:.4f} s")
# 对于 <psi|psi>QFT 的正确结果应无限接近 1.0
print(f"Result (Norm): {total.real:.10f} + {total.imag:.10f}j")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--n_qubits", type=int, default=20)
# QFT 的深度由比特数自动决定,所以删除了 --depth 参数
args = parser.parse_args()
run_mpi(args.n_qubits)

View File

@@ -1,56 +0,0 @@
import time
import pickle
import numpy as np
import quimb.tensor as qtn
import cotengra as ctg
def build_qft(n_qubits):
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
for i in range(n_qubits):
circ.apply_gate('H', i)
for j in range(i + 1, n_qubits):
circ.apply_gate('CPHASE', np.pi / 2 ** (j - i), i, j)
return circ
def run(n_qubits, output="tree.pkl"):
print(f"Circuit: QFT {n_qubits} qubits")
circ = build_qft(n_qubits)
psi = circ.psi
Z = np.array([[1, 0], [0, -1]], dtype=np.complex128)
bra = psi.conj().reindex({f'k{i}': f'b{i}' for i in range(n_qubits)})
obs = qtn.Tensor(Z, inds=(f'k0', f'b0'))
net = psi & obs & bra
t0 = time.perf_counter()
opt = ctg.HyperOptimizer(
methods=['kahypar'],
max_repeats=32,
minimize='combo',
parallel=8,
)
tree = net.contraction_tree(optimize=opt)
t1 = time.perf_counter()
print(f"Path search: {t1 - t0:.4f} s")
print(tree)
sliced_tree = tree.slice(target_size=2**28)
print(f"Total slices: {sliced_tree.nslices}")
arrays = [t.data for t in net.tensors]
with open(output, "wb") as f:
pickle.dump({"sliced_tree": sliced_tree, "arrays": arrays}, f)
print(f"Saved to {output}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--n_qubits", type=int, default=18)
parser.add_argument("--output", type=str, default="tree.pkl")
args = parser.parse_args()
run(args.n_qubits, args.output)

View File

@@ -61,6 +61,6 @@ def test_eval(nqubits: int, tolerance: float, is_mps: bool):
qasm_circ, init_state_tn, gate_opt, backend=config.quimb.backend
).flatten()
#assert np.allclose(
# result_sv, result_tn, atol=tolerance
#), "Resulting dense vectors do not match"
assert np.allclose(
result_sv, result_tn, atol=tolerance
), "Resulting dense vectors do not match"

Binary file not shown.