tn脚本更新
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled

This commit is contained in:
2026-05-03 18:54:05 +08:00
parent 740828872e
commit dd222587b7
8 changed files with 1500 additions and 66 deletions

3
.gitignore vendored
View File

@@ -5,7 +5,8 @@ __pycache__/
data/
# C extensions
*.so
bak/
perf*
# Distribution / packaging
.Python
build/

460
bench_profile.py Normal file
View File

@@ -0,0 +1,460 @@
"""Benchmark: qibotn/quimb generic TN — single-process torch profiling version."""
import os
import pickle
import time
import argparse
import numpy as np
import cotengra as ctg
import qibo
from qibo import Circuit, gates
def make_circuit(circuit_type, nqubits, nlayers=1):
c = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
return QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for q in range(nqubits):
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CZ(q, q + 1))
elif circuit_type == "ghz":
c.add(gates.H(0))
for q in range(nqubits - 1):
c.add(gates.CNOT(q, q + 1))
elif circuit_type == "brickwork":
for q in range(nqubits):
c.add(gates.H(q))
for layer in range(nlayers):
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CNOT(q, q + 1))
c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi)))
c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi)))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
return c
def make_z_observable(nqubits):
"""Z on qubit 0 only — single contraction for benchmarking."""
return ["z"], [(0,)], [1.0]
def export_profiler_outputs(prof, trace_path):
"""Export Chrome trace and text table."""
prof.export_chrome_trace(trace_path)
table_path = trace_path.replace(".json", ".txt")
with open(table_path, "w") as f:
f.write(
prof.key_averages().table(
sort_by="self_cpu_time_total",
row_limit=200,
)
)
print(f" [torch profiler trace] {trace_path}")
print(f" [torch profiler table] {table_path}")
def run_quimb_tn(
circuit,
nqubits,
num_slices,
load_path=None,
save_path=None,
):
"""Mode: expval — compute <Z_0> via local_expectation."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
operators, sites, coeffs = make_z_observable(nqubits)
ops = b._string_to_quimb_operator(operators[0])
qc = b._qibo_circuit_to_quimb(
circuit,
quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10},
)
if load_path:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree = saved["tree"]
t_search = 0.0
print(f" [path loaded] {load_path}")
else:
opt = ctg.HyperOptimizer(
methods=["kahypar", "random-greedy", "spinglass"],
max_repeats=16,
parallel=True,
max_time=60,
slicing_opts={"target_slices": num_slices},
progbar=True,
)
t0 = time.time()
rehearsal = qc.local_expectation(
ops,
where=sites[0],
optimize=opt,
simplify_sequence="R",
rehearse=True,
)
t_search = time.time() - t0
tree = rehearsal["tree"]
print(
f" [path search] {t_search:.3f}s "
f"flops~2^{tree.contraction_cost():.2f} "
f"size~2^{tree.contraction_width():.2f} "
f"slices={tree.multiplicity}"
)
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree}, f)
print(f" [path saved] {save_path}")
t0 = time.time()
expval = qc.local_expectation(
ops,
where=sites[0],
optimize=tree,
simplify_sequence="R",
)
t_contract = time.time() - t0
print(f" [contraction] {t_contract:.3f}s")
return float(expval.real), t_search + t_contract
def run_quimb_tn_statevector(
circuit,
nqubits,
num_slices,
load_path=None,
save_path=None,
profile=False,
profile_dir="profiles",
):
"""Mode: statevector — contract full TN to dense vector, single process."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
import torch
qc = b._qibo_circuit_to_quimb(
circuit,
quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10},
)
# 让 quimb 生成 torch tensor这样 torch.profiler 能抓到 aten op。
qc.to_backend = torch.from_numpy
if load_path:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree = saved["tree"]
t_search = 0.0
print(f" [path loaded] {load_path}")
else:
opt = ctg.HyperOptimizer(
methods=["kahypar", "random-greedy", "spinglass"],
max_repeats=500,
parallel=48,
max_time=100,
minimize="size",
slicing_opts={"target_slices": num_slices},
progbar=True,
)
t0 = time.time()
rehearsal = qc.to_dense(optimize=opt, rehearse=True)
t_search = time.time() - t0
tree = rehearsal["tree"]
print(
f" [path search] {t_search:.3f}s "
f"flops~2^{tree.contraction_cost():.2f} "
f"size~2^{tree.contraction_width():.2f} "
f"slices={tree.multiplicity}"
)
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree}, f)
print(f" [path saved] {save_path}")
os.makedirs(profile_dir, exist_ok=True)
if profile:
from torch.profiler import profile as torch_profile
from torch.profiler import ProfilerActivity, record_function
trace_path = os.path.join(
profile_dir,
(
f"trace_statevector_"
f"{circuit.nqubits}q_"
f"slices{tree.multiplicity}_"
f"{int(time.time())}.json"
),
)
t0 = time.time()
with torch_profile(
activities=[ProfilerActivity.CPU],
record_shapes=True,
profile_memory=True,
with_stack=True,
) as prof:
with record_function("qibotn_to_dense_contraction"):
sv = qc.to_dense(optimize=tree).reshape(-1)
with record_function("torch_to_numpy_view_or_copy"):
if type(sv).__module__.startswith("torch"):
sv_tn = sv.detach().cpu().numpy()
else:
sv_tn = np.asarray(sv)
t_contract = time.time() - t0
export_profiler_outputs(prof, trace_path)
else:
t0 = time.time()
sv = qc.to_dense(optimize=tree).reshape(-1)
t_contract = time.time() - t0
if type(sv).__module__.startswith("torch"):
sv_tn = sv.detach().cpu().numpy()
else:
sv_tn = np.asarray(sv)
print(f" [contraction] {t_contract:.3f}s")
return sv_tn, t_search + t_contract
def run_quimb_tn_samples(circuit, nshots=1024):
"""Mode: samples — sample from circuit output distribution."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
t0 = time.time()
result = b.execute_circuit(circuit, nshots=nshots)
t_total = time.time() - t0
print(f" [sampling] {t_total:.3f}s nshots={nshots}")
try:
freqs = result.frequencies()
print(f" top states: {dict(list(freqs.items())[:5])}")
except Exception:
pass
return result, t_total
def qibojit_expval(circuit, nqubits):
"""Compute <Z_0> via qibojit statevector."""
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = np.array(result.state(), dtype=complex).flatten()
probs = np.abs(sv) ** 2
bits = (np.arange(len(probs)) >> (nqubits - 1)) & 1
expval = float(np.dot(probs, 1 - 2 * bits))
return expval, elapsed
def run_qibojit(circuit):
"""Compute full statevector via qibojit."""
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = np.array(result.state(), dtype=complex).flatten()
return sv, elapsed
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--nqubits", type=int, default=10)
parser.add_argument(
"--circuit",
type=str,
default="qft",
choices=["qft", "variational", "ghz", "brickwork"],
)
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--num-slices", type=int, default=1)
parser.add_argument("--nshots", type=int, default=1024)
parser.add_argument(
"--mode",
type=str,
default="statevector",
choices=["expval", "statevector", "samples"],
help="expval: local_expectation; statevector: to_dense; samples: sampling",
)
parser.add_argument(
"--no-compare",
action="store_true",
help="Skip qibojit reference run",
)
parser.add_argument(
"--save-path",
type=str,
default=None,
help="Save contraction tree to a pickle file",
)
parser.add_argument(
"--load-path",
type=str,
default=None,
help="Load contraction tree from a pickle file and skip path search",
)
parser.add_argument(
"--profile",
action="store_true",
help="Enable torch profiler for statevector contraction stage",
)
parser.add_argument(
"--profile-dir",
type=str,
default="profiles",
help="Directory to save torch profiler traces",
)
parser.add_argument(
"--save-statevector",
action="store_true",
help="Save TN statevector to data/sv_tn_*.npy",
)
args = parser.parse_args()
print(
f"Circuit: {args.circuit}, "
f"nqubits={args.nqubits}, "
f"nlayers={args.nlayers}, "
f"mode={args.mode}, "
f"profile={args.profile}"
)
np.random.seed(42)
circuit_tn = make_circuit(args.circuit, args.nqubits, args.nlayers)
try:
if args.mode == "expval":
expval_tn, t_tn = run_quimb_tn(
circuit_tn,
args.nqubits,
args.num_slices,
load_path=args.load_path,
save_path=args.save_path,
)
print(f"\n[quimb TN] time={t_tn:.4f}s <Z_0>={expval_tn:.8f}")
elif args.mode == "statevector":
sv_tn, t_tn = run_quimb_tn_statevector(
circuit_tn,
args.nqubits,
args.num_slices,
load_path=args.load_path,
save_path=args.save_path,
profile=args.profile,
profile_dir=args.profile_dir,
)
print(
f"\n[quimb TN] time={t_tn:.4f}s "
f"statevector shape={sv_tn.shape}"
)
if args.save_statevector:
os.makedirs("data", exist_ok=True)
out_path = f"data/sv_tn_{args.circuit}{args.nqubits}.npy"
np.save(out_path, sv_tn)
print(f"[saved statevector] {out_path}")
else:
_, t_tn = run_quimb_tn_samples(
circuit_tn,
nshots=args.nshots,
)
print(f"\n[quimb TN] time={t_tn:.4f}s")
args.no_compare = True
except Exception as e:
print(f"[quimb TN] FAILED: {e}")
raise
if not args.no_compare and args.mode != "statevector":
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
expval_ref, t_ref = qibojit_expval(circuit_ref, args.nqubits)
print(f"[qibojit] time={t_ref:.4f}s <Z_0>={expval_ref:.8f}")
print(f"\nDiff : {abs(expval_tn - expval_ref):.2e}")
if t_tn > 0:
print(f"Speedup : {t_ref / t_tn:.2f}x")
elif not args.no_compare and args.mode == "statevector" and sv_tn is not None:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
sv_ref, t_ref = run_qibojit(circuit_ref)
fid = abs(np.dot(sv_ref.conj(), sv_tn)) ** 2
l2_err = np.linalg.norm(sv_ref - sv_tn)
print(f"[qibojit] time={t_ref:.4f}s")
print(f"Fidelity : {fid:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_tn > 0:
print(f"Speedup : {t_ref / t_tn:.2f}x")
if __name__ == "__main__":
main()

View File

@@ -4,16 +4,17 @@ import argparse
import os
import numpy as np
import qibo
import quimb.tensor as qtn
from qibo import Circuit, gates
DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
def make_circuit(circuit_type, nqubits, nlayers=1):
def make_circuit(circuit_type, nqubits, nlayers=1, add_measurements=False):
c = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
return QFT(nqubits)
c = QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for q in range(nqubits):
@@ -27,6 +28,8 @@ def make_circuit(circuit_type, nqubits, nlayers=1):
c.add(gates.CNOT(q, q + 1))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
if add_measurements:
c.add(gates.M(*range(nqubits)))
return c
@@ -39,20 +42,58 @@ def run_qibojit(circuit):
return sv, elapsed
def run_quimb_mps(circuit, max_bond, svd_cutoff, optimizer):
def run_quimb_mps(circuit, max_bond, svd_cutoff, optimizer, nshots=None):
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="mps", max_bond_dimension=max_bond, svd_cutoff=svd_cutoff)
b.contractions_optimizer = optimizer
t0 = time.time()
result = b.execute_circuit(circuit, return_array=True)
elapsed = time.time() - t0
sv = result.state()
return sv, elapsed
if nshots:
result = b.execute_circuit(circuit, nshots=nshots)
elapsed = time.time() - t0
return result.frequencies(), elapsed, 0.0
else:
# MPS simulation
circ_quimb = qtn.CircuitMPS.from_openqasm2_str(
circuit.to_qasm(),
gate_opts={"max_bond": max_bond, "cutoff": svd_cutoff},
)
t_mps = time.time() - t0
# to_dense separately
t1 = time.time()
#sv = circ_quimb.psi.to_dense().reshape(-1)
sv = None
t_dense = time.time() - t1
return sv, t_mps, t_dense
def compare(sv_ref, sv_mps):
def run_quimb_permmps(circuit, max_bond, svd_cutoff, nshots=None):
gates_list = [
qtn.Gate(g.name, params=list(g.parameters), qubits=list(g.qubits))
for g in circuit.queue
if g.name.lower() != "measure"
]
t0 = time.time()
circ = qtn.CircuitPermMPS.from_gates(
gates_list,
N=circuit.nqubits,
max_bond=max_bond,
cutoff=svd_cutoff,
)
if nshots:
from collections import Counter
result = Counter(circ.sample(nshots))
elapsed = time.time() - t0
return dict(result), elapsed
else:
mps = circ.get_psi_unordered()
sv = mps.to_dense().reshape(-1)
elapsed = time.time() - t0
return sv, elapsed
def compare_statevector(sv_ref, sv_mps):
sv_ref = np.array(sv_ref, dtype=complex).flatten()
sv_mps = np.array(sv_mps, dtype=complex).flatten()
fidelity = abs(np.dot(sv_ref.conj(), sv_mps)) ** 2
@@ -60,6 +101,12 @@ def compare(sv_ref, sv_mps):
return fidelity, l2_err
def compare_frequencies(freq_ref, freq_mps, nshots):
all_keys = set(freq_ref) | set(freq_mps)
tvd = 0.5 * sum(abs(freq_ref.get(k, 0) - freq_mps.get(k, 0)) for k in all_keys) / nshots
return tvd
def jit_cache_path(circuit_type, nqubits, nlayers):
os.makedirs(DATA_DIR, exist_ok=True)
return os.path.join(DATA_DIR, f"jit_{circuit_type}_n{nqubits}_l{nlayers}.npy")
@@ -74,37 +121,65 @@ def main():
parser.add_argument("--max-bond", type=int, default=None,
help="Max bond dimension for MPS (None = unlimited)")
parser.add_argument("--svd-cutoff", type=float, default=1e-6)
parser.add_argument("--optimizer", type=str, default="auto-hq")
parser.add_argument("--optimizer", type=str, default="eager")
parser.add_argument("--nshots", type=int, default=None,
help="Use sampling mode with given number of shots instead of statevector")
parser.add_argument("--permmps", action="store_true",
help="Use CircuitPermMPS directly instead of qibotn backend")
parser.add_argument("--skip-jit", action="store_true",
help="Skip qibojit run, load cached statevector if available")
parser.add_argument("--no-compare", action="store_true",
help="Skip correctness comparison entirely")
args = parser.parse_args()
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}")
print(f"MPS config: max_bond={args.max_bond}, svd_cutoff={args.svd_cutoff}, optimizer={args.optimizer}")
cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers)
ref = None
t_ref = None
if args.skip_jit and os.path.exists(cache_path):
sv_ref = np.load(cache_path)
print(f"\n[qibojit] loaded from cache: {cache_path}")
else:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
sv_ref, t_ref = run_qibojit(circuit_ref)
np.save(cache_path, sv_ref)
print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})")
if not args.no_compare:
cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers)
if args.nshots:
# frequency mode: run qibojit with same nshots
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers, add_measurements=True)
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result_ref = circuit_ref(nshots=args.nshots)
t_ref = time.time() - t0
ref = dict(result_ref.frequencies())
print(f"\n[qibojit] time={t_ref:.4f}s")
elif args.skip_jit and os.path.exists(cache_path):
ref = np.load(cache_path)
print(f"\n[qibojit] loaded from cache: {cache_path}")
else:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
ref, t_ref = run_qibojit(circuit_ref)
np.save(cache_path, ref)
print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})")
np.random.seed(42)
circuit_mps = make_circuit(args.circuit, args.nqubits, args.nlayers)
label = "quimb PermMPS" if args.permmps else "quimb MPS"
try:
sv_mps, t_mps = run_quimb_mps(circuit_mps, args.max_bond, args.svd_cutoff, args.optimizer)
fidelity, l2_err = compare(sv_ref, sv_mps)
print(f"[quimb MPS] time={t_mps:.4f}s")
print(f"\nFidelity : {fidelity:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_ref is not None and t_mps > 0:
print(f"Speedup : {t_ref/t_mps:.2f}x")
if args.permmps:
out, t_mps = run_quimb_permmps(circuit_mps, args.max_bond, args.svd_cutoff, args.nshots)
t_dense = 0.0
else:
out, t_mps, t_dense = run_quimb_mps(circuit_mps, args.max_bond, args.svd_cutoff, args.optimizer, args.nshots)
print(f"[{label}] MPS sim={t_mps:.4f}s to_dense={t_dense:.4f}s total={t_mps+t_dense:.4f}s")
if not args.no_compare:
if args.nshots:
tvd = compare_frequencies(ref, out, args.nshots)
print(f"\nTVD : {tvd:.6f} (0=perfect)")
else:
fidelity, l2_err = compare_statevector(ref, out)
print(f"\nFidelity : {fidelity:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_ref is not None and t_mps > 0:
print(f"Speedup : {t_ref/t_mps:.2f}x")
except Exception as e:
print(f"[quimb MPS] FAILED: {e}")
raise

126
benchmark_qmatchatea.py Normal file
View File

@@ -0,0 +1,126 @@
"""Benchmark: qibojit (reference) vs qibotn/qmatchatea MPS."""
import time
import argparse
import os
import numpy as np
import qibo
from qibo import Circuit, gates
from qibo.backends import construct_backend
DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
def make_circuit(circuit_type, nqubits, nlayers=1):
c = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
return QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for q in range(nqubits):
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CZ(q, q + 1))
elif circuit_type == "ghz":
c.add(gates.H(0))
for q in range(nqubits - 1):
c.add(gates.CNOT(q, q + 1))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
return c
def run_qibojit(circuit):
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
return result.state(), elapsed
def run_qmatchatea(circuit, max_bond, cut_ratio):
import qmatchatea, qtealeaves.observables
from qibo.backends import construct_backend as _cb
b = _cb(backend="qibotn", platform="qmatchatea")
b.configure_tn_simulation(ansatz="MPS", max_bond_dimension=max_bond, cut_ratio=cut_ratio)
qk_circuit = b._qibocirc_to_qiskitcirc(circuit)
run_qk_params = qmatchatea.preprocessing.qk_transpilation_params(False)
observables = qtealeaves.observables.TNObservables()
observables += qtealeaves.observables.TNState2File(name="temp", formatting="D")
t0 = time.time()
results = qmatchatea.run_simulation(
circ=qk_circuit,
convergence_parameters=b.convergence_params,
transpilation_parameters=run_qk_params,
backend=b.qmatchatea_backend,
observables=observables,
)
elapsed = time.time() - t0
tn_state = results.observables.get("tn_state")
if tn_state is None:
results.load_state()
tn_state = results.observables["tn_state"]
sv_obj = tn_state.to_statevector(qiskit_order=False, max_qubit_equivalent=40)
sv = np.array(sv_obj.elem, dtype=complex).flatten()
return sv, elapsed
def compare(sv_ref, sv_mps):
sv_ref = np.array(sv_ref, dtype=complex).flatten()
fidelity = abs(np.dot(sv_ref.conj(), sv_mps)) ** 2
l2_err = np.linalg.norm(sv_ref - sv_mps)
return fidelity, l2_err
def jit_cache_path(circuit_type, nqubits, nlayers):
os.makedirs(DATA_DIR, exist_ok=True)
return os.path.join(DATA_DIR, f"jit_{circuit_type}_n{nqubits}_l{nlayers}.npy")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--nqubits", type=int, default=10)
parser.add_argument("--circuit", type=str, default="ghz",
choices=["qft", "variational", "ghz"])
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--max-bond", type=int, default=64)
parser.add_argument("--cut-ratio", type=float, default=1e-6)
parser.add_argument("--skip-jit", action="store_true")
args = parser.parse_args()
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}")
print(f"MPS config: max_bond={args.max_bond}, cut_ratio={args.cut_ratio}")
cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers)
t_ref = None
if args.skip_jit and os.path.exists(cache_path):
sv_ref = np.load(cache_path)
print(f"\n[qibojit] loaded from cache: {cache_path}")
else:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
sv_ref, t_ref = run_qibojit(circuit_ref)
np.save(cache_path, sv_ref)
print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})")
np.random.seed(42)
circuit_mps = make_circuit(args.circuit, args.nqubits, args.nlayers)
try:
sv_mps, t_mps = run_qmatchatea(circuit_mps, args.max_bond, args.cut_ratio)
fidelity, l2_err = compare(sv_ref, sv_mps)
print(f"[qmatchatea] time={t_mps:.4f}s")
print(f"\nFidelity : {fidelity:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_ref is not None and t_mps > 0:
print(f"Speedup : {t_ref/t_mps:.2f}x")
except Exception as e:
print(f"[qmatchatea] FAILED: {e}")
raise
if __name__ == "__main__":
main()

View File

@@ -1,4 +1,5 @@
"""Benchmark: qibotn/quimb generic TN — expectation values."""
import pickle
import time
import argparse
import numpy as np
@@ -22,6 +23,15 @@ def make_circuit(circuit_type, nqubits, nlayers=1):
c.add(gates.H(0))
for q in range(nqubits - 1):
c.add(gates.CNOT(q, q + 1))
elif circuit_type == "brickwork":
for q in range(nqubits):
c.add(gates.H(q))
for layer in range(nlayers):
offset = layer % 2
for q in range(offset, nqubits - 1, 2):
c.add(gates.CNOT(q, q + 1))
c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi)))
c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi)))
else:
raise ValueError(f"Unknown circuit: {circuit_type}")
return c
@@ -33,80 +43,305 @@ def make_z_observable(nqubits):
return ["z"], [(0,)], [1.0]
def run_quimb_tn(circuit, nqubits):
def run_quimb_tn(circuit, nqubits, num_slices, load_path=None, save_path=None):
"""Mode: expval — compute <Z_0> via local_expectation (lightcone pruning)."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn") # generic TN, no MPS
#if max_time is not None:
# opt = ctg.HyperOptimizer(max_repeats=128, max_time=max_time, minimize=minimize, parallel=True)
#else:
opt = ctg.HyperOptimizer(
max_repeats=16,
parallel=True,
slicing_opts={'target_size': 2**24}, # 限制单个张量最大 2^28 个元素
progbar=True
)
b.contractions_optimizer = opt
b.configure_tn_simulation(ansatz="tn")
operators, sites, coeffs = make_z_observable(nqubits)
ops = b._string_to_quimb_operator(operators[0])
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10})
if load_path:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree = saved["tree"]
t_search = 0.0
print(f" [path loaded] {load_path}")
else:
opt = ctg.HyperOptimizer(
methods=['kahypar', 'random-greedy', 'spinglass'],
max_repeats=16,
parallel=True,
max_time=60,
slicing_opts={'target_slices': num_slices},
progbar=True,
)
t0 = time.time()
rehearsal = qc.local_expectation(ops, where=sites[0], optimize=opt,
simplify_sequence="R", rehearse=True)
t_search = time.time() - t0
tree = rehearsal['tree']
print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}")
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree}, f)
print(f" [path saved] {save_path}")
t0 = time.time()
expval = b.exp_value_observable_symbolic(circuit, operators, sites, coeffs, nqubits)
elapsed = time.time() - t0
return expval, elapsed
expval = qc.local_expectation(ops, where=sites[0], optimize=tree, simplify_sequence="R")
t_contract = time.time() - t0
print(f" [contraction] {t_contract:.3f}s")
return float(expval.real), t_search + t_contract
def run_quimb_tn_statevector(circuit, nqubits, num_slices, load_path=None, save_path=None):
"""Mode: statevector — contract full TN to dense vector."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
import torch
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10})
qc.to_backend = torch.from_numpy
if load_path:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree = saved["tree"]
t_search = 0.0
print(f" [path loaded] {load_path}")
else:
opt = ctg.HyperOptimizer(
methods=['kahypar', 'random-greedy', 'spinglass'],
max_repeats=128,
parallel=64,
max_time=100,
minimize='size',
slicing_opts={'target_slices': num_slices},
#slicing_opts={'target_size': 2**30},
progbar=True,
)
t0 = time.time()
rehearsal = qc.to_dense(optimize=opt, rehearse=True)
t_search = time.time() - t0
tree = rehearsal['tree']
print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}")
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree}, f)
print(f" [path saved] {save_path}")
t0 = time.time()
sv = qc.to_dense(optimize=tree,implementation="cotengra").reshape(-1)
t_contract = time.time() - t0
print(f" [contraction] {t_contract:.3f}s")
sv_tn = np.array(sv)
return sv_tn, t_search + t_contract
def _contract_mpi(tree, arrays, comm, root=0):
"""Contract slices via MPI, returning result as the same array type as input.
Unlike ``cotengra.ContractionTree.contract_mpi``, this works with any
array backend (numpy, torch, etc.) — it only converts to numpy at the
MPI-reduce boundary and converts back.
"""
size = comm.Get_size()
rank = comm.Get_rank()
result_np = None
is_torch = type(arrays[0]).__module__.startswith("torch")
for i in range(rank, tree.multiplicity, size):
x = tree.contract_slice(arrays, i)
x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x))
if result_np is None:
result_np = x_np
else:
result_np += x_np
if result_np is None:
result_np = np.zeros(1, dtype=np.complex64)
if rank == root:
result = np.zeros_like(result_np)
else:
result = None
comm.Reduce(result_np, result, root=root)
if rank == root:
import torch
return torch.from_numpy(np.asarray(result)) if is_torch else result
return None
def run_quimb_tn_statevector_mpi(circuit, nqubits, num_slices, load_path=None, save_path=None):
"""MPI-parallel statevector via custom MPI contraction (supports torch backend)."""
from mpi4py import MPI
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
import torch
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
gate_opts={"max_bond": None, "cutoff": 1e-10})
qc.to_backend = torch.from_numpy
# path search on rank 0, broadcast to all
if rank == 0:
if load_path:
with open(load_path, "rb") as f:
saved = pickle.load(f)
tree = saved["tree"]
psi = saved["psi"]
t_search = 0.0
print(f" [path loaded] {load_path}")
else:
opt = ctg.HyperOptimizer(
methods=['kahypar', 'random-greedy', 'spinglass'],
max_repeats=128,
parallel=64,
#max_repeats=1,
max_time=100,
minimize='size',
slicing_opts={'target_slices': max(num_slices, size), 'allow_outer': False},
progbar=True,
)
t0 = time.time()
rehearsal = qc.to_dense(optimize=opt, rehearse=True)
t_search = time.time() - t0
tree = rehearsal['tree']
psi = rehearsal['tn']
print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f} slices={tree.multiplicity}")
if save_path:
with open(save_path, "wb") as f:
pickle.dump({"tree": tree, "psi": psi}, f)
print(f" [path saved] {save_path}")
else:
tree = None
psi = None
t_search = 0.0
tree = comm.bcast(tree, root=0)
psi = comm.bcast(psi, root=0)
t_search = comm.bcast(t_search, root=0)
arrays = psi.arrays
t0 = time.time()
sv = _contract_mpi(tree, arrays, comm, root=0)
t_contract = time.time() - t0
if rank == 0:
print(f" [contraction] {t_contract:.3f}s")
return np.array(sv).reshape(-1), t_search + t_contract
return None, t_search + t_contract
def run_quimb_tn_samples(circuit, nshots=1024):
"""Mode: samples — sample from circuit output distribution."""
qibo.set_backend("qibotn", platform="quimb")
b = qibo.get_backend()
b.configure_tn_simulation(ansatz="tn")
t0 = time.time()
result = b.execute_circuit(circuit, nshots=nshots)
t_total = time.time() - t0
print(f" [sampling] {t_total:.3f}s nshots={nshots}")
print(f" top states: {dict(list(result.frequencies().items())[:5])}")
return result, t_total
def qibojit_expval(circuit, nqubits):
"""Compute sum_i <Z_i> via qibojit statevector."""
"""Compute <Z_0> via qibojit statevector."""
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = np.array(result.state(), dtype=complex).flatten()
probs = np.abs(sv) ** 2
expval = sum(
probs[idx] * (1 - 2 * ((idx >> (nqubits - 1 - i)) & 1))
for i in range(nqubits)
for idx in range(len(probs))
)
return float(expval), elapsed
bits = (np.arange(len(probs)) >> (nqubits - 1)) & 1
expval = float(np.dot(probs, 1 - 2 * bits))
return expval, elapsed
def run_qibojit(circuit):
"""Compute full statevector via qibojit."""
qibo.set_backend("qibojit", platform="numba")
t0 = time.time()
result = circuit()
elapsed = time.time() - t0
sv = np.array(result.state(), dtype=complex).flatten()
return sv, elapsed
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--nqubits", type=int, default=10)
parser.add_argument("--circuit", type=str, default="qft",
choices=["qft", "variational", "ghz"])
choices=["qft", "variational", "ghz", "brickwork"])
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--optimizer", type=str, default="auto-hq")
parser.add_argument("--max-time", type=float, default=None,
help="HyperOptimizer max search time (seconds); overrides --optimizer")
parser.add_argument("--minimize", type=str, default="flops",
choices=["flops", "size", "write"],
help="HyperOptimizer minimize target")
parser.add_argument("--num-slices", type=int, default=1)
parser.add_argument("--nshots", type=int, default=1024)
parser.add_argument("--mode", type=str, default="statevector",
choices=["expval", "statevector", "samples"],
help="expval: local_expectation; statevector: to_dense; samples: sampling")
parser.add_argument("--mpi", action="store_true",
help="Use MPI-parallel contraction (run with mpirun -n N)")
parser.add_argument("--no-compare", action="store_true",
help="Skip qibojit reference run")
parser.add_argument("--save-path", type=str, default=None,
help="Save contraction tree to a pickle file")
parser.add_argument("--load-path", type=str, default=None,
help="Load contraction tree from a pickle file (skip path search)")
args = parser.parse_args()
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}")
print(f"TN config: optimizer={args.optimizer}, max_time={args.max_time}, minimize={args.minimize}")
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}, mode={args.mode}")
np.random.seed(42)
circuit_tn = make_circuit(args.circuit, args.nqubits, args.nlayers)
try:
expval_tn, t_tn = run_quimb_tn(circuit_tn, args.nqubits)
print(f"\n[quimb TN] time={t_tn:.4f}s <sum Z_i>={expval_tn:.8f}")
if args.mode == "expval":
expval_tn, t_tn = run_quimb_tn(circuit_tn, args.nqubits, args.num_slices,
load_path=args.load_path, save_path=args.save_path)
print(f"\n[quimb TN] time={t_tn:.4f}s <Z_0>={expval_tn:.8f}")
elif args.mode == "statevector":
if args.mpi:
sv_tn, t_tn = run_quimb_tn_statevector_mpi(circuit_tn, args.nqubits, args.num_slices,
load_path=args.load_path, save_path=args.save_path)
else:
sv_tn, t_tn = run_quimb_tn_statevector(circuit_tn, args.nqubits, args.num_slices,
load_path=args.load_path, save_path=args.save_path)
if sv_tn is not None:
print(f"\n[quimb TN] time={t_tn:.4f}s statevector shape={sv_tn.shape}")
np.save(f"data/sv_tn_{args.circuit}{args.nqubits}.npy", sv_tn)
else:
_, t_tn = run_quimb_tn_samples(circuit_tn, args.nqubits, args.nshots)
print(f"\n[quimb TN] time={t_tn:.4f}s")
args.no_compare = True # samples 模式无法和 qibojit 期望值对比
except Exception as e:
print(f"[quimb TN] FAILED: {e}")
raise
if not args.no_compare:
if not args.no_compare and args.mode != "statevector":
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
expval_ref, t_ref = qibojit_expval(circuit_ref, args.nqubits)
print(f"[qibojit] time={t_ref:.4f}s <sum Z_i>={expval_ref:.8f}")
print(f"[qibojit] time={t_ref:.4f}s <Z_0>={expval_ref:.8f}")
print(f"\nDiff : {abs(expval_tn - expval_ref):.2e}")
if t_tn > 0:
print(f"Speedup : {t_ref/t_tn:.2f}x")
elif not args.no_compare and args.mode == "statevector" and sv_tn is not None:
np.random.seed(42)
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
sv_ref, t_ref = run_qibojit(circuit_ref)
fid = abs(np.dot(sv_ref.conj(), sv_tn)) ** 2
l2_err = np.linalg.norm(sv_ref - sv_tn)
print(f"[qibojit] time={t_ref:.4f}s")
print(f"Fidelity : {fid:.8f} (1=perfect)")
print(f"L2 error : {l2_err:.2e}")
if t_tn > 0:
print(f"Speedup : {t_ref/t_tn:.2f}x")
if __name__ == "__main__":

View File

@@ -0,0 +1,519 @@
"""Benchmark and profile the qibotn/quimb CPU backend.
This script is intended to be the stable baseline for quimb backend
optimization work. It supports:
- multiple circuit families
- MPS or generic TN execution
- statevector, sampling, conversion, and local expectation workloads
- warmup/repeat timing
- optional correctness checks against qibojit
- optional cProfile output
"""
from __future__ import annotations
import argparse
import cProfile
import json
import math
import os
import pstats
import time
from pathlib import Path
from statistics import mean, pstdev
import numpy as np
import qibo
from qibo import Circuit, gates
def configure_runtime_env(quimb_num_procs: int | None, blas_threads: int | None):
"""Pin process-level thread settings before heavy work starts."""
if quimb_num_procs is not None:
os.environ["QUIMB_NUM_PROCS"] = str(quimb_num_procs)
if blas_threads is not None:
value = str(blas_threads)
os.environ["OMP_NUM_THREADS"] = value
os.environ["OPENBLAS_NUM_THREADS"] = value
os.environ["MKL_NUM_THREADS"] = value
os.environ["NUMEXPR_NUM_THREADS"] = value
def make_circuit(
circuit_type: str,
nqubits: int,
nlayers: int,
seed: int,
add_measurements: bool = False,
) -> Circuit:
"""Construct repeatable workloads covering low/high entanglement cases."""
rng = np.random.default_rng(seed)
circuit = Circuit(nqubits)
if circuit_type == "qft":
from qibo.models import QFT
circuit = QFT(nqubits)
elif circuit_type == "variational":
for layer in range(nlayers):
for qubit in range(nqubits):
circuit.add(gates.RY(qubit, theta=rng.uniform(0.0, 2.0 * np.pi)))
offset = layer % 2
for qubit in range(offset, nqubits - 1, 2):
circuit.add(gates.CZ(qubit, qubit + 1))
elif circuit_type == "ghz":
circuit.add(gates.H(0))
for qubit in range(nqubits - 1):
circuit.add(gates.CNOT(qubit, qubit + 1))
elif circuit_type == "qaoa":
for _ in range(nlayers):
for qubit in range(nqubits):
circuit.add(gates.RZ(qubit, theta=rng.uniform(0.0, 2.0 * np.pi)))
for qubit in range(0, nqubits - 1, 2):
circuit.add(gates.CZ(qubit, qubit + 1))
for qubit in range(nqubits):
circuit.add(gates.RX(qubit, theta=rng.uniform(0.0, 2.0 * np.pi)))
elif circuit_type == "ising1d":
for _ in range(nlayers):
for qubit in range(nqubits):
circuit.add(gates.RX(qubit, theta=rng.uniform(0.0, 2.0 * np.pi)))
for qubit in range(0, nqubits - 1, 2):
circuit.add(gates.CZ(qubit, qubit + 1))
for qubit in range(1, nqubits - 1, 2):
circuit.add(gates.CZ(qubit, qubit + 1))
elif circuit_type == "rcs":
cols = math.ceil(math.sqrt(nqubits))
rows = math.ceil(nqubits / cols)
single_qubit_gates = [gates.T, gates.X, gates.Y]
for layer in range(nlayers):
for qubit in range(nqubits):
gate_cls = single_qubit_gates[rng.integers(0, len(single_qubit_gates))]
circuit.add(gate_cls(qubit))
if layer % 2 == 0:
for row in range(rows):
for col in range(0, cols - 1, 2):
q1, q2 = row * cols + col, row * cols + col + 1
if q2 < nqubits:
circuit.add(gates.CZ(q1, q2))
else:
for row in range(0, rows - 1, 2):
for col in range(cols):
q1, q2 = row * cols + col, (row + 1) * cols + col
if q2 < nqubits:
circuit.add(gates.CZ(q1, q2))
else:
raise ValueError(f"Unknown circuit type: {circuit_type}")
if add_measurements:
circuit.add(gates.M(*range(nqubits)))
return circuit
def prepare_quimb_backend(
ansatz: str,
max_bond: int | None,
svd_cutoff: float,
optimizer: str,
n_most_frequent_states: int,
):
"""Create and configure the qibotn/quimb backend once."""
qibo.set_backend("qibotn", platform="quimb")
backend = qibo.get_backend()
backend.configure_tn_simulation(
ansatz=ansatz,
max_bond_dimension=max_bond,
svd_cutoff=svd_cutoff,
n_most_frequent_states=n_most_frequent_states,
)
backend.contractions_optimizer = optimizer
return backend
def run_qibojit_state(circuit: Circuit):
qibo.set_backend("qibojit", platform="numba")
t0 = time.perf_counter()
result = circuit()
elapsed = time.perf_counter() - t0
state = np.asarray(result.state(), dtype=complex).reshape(-1)
return state, elapsed
def run_qibojit_shots(circuit: Circuit, nshots: int):
qibo.set_backend("qibojit", platform="numba")
t0 = time.perf_counter()
result = circuit(nshots=nshots)
elapsed = time.perf_counter() - t0
return dict(result.frequencies()), elapsed
def z_expectation_from_statevector(statevector: np.ndarray, nqubits: int, qubit: int):
probs = np.abs(np.asarray(statevector).reshape(-1)) ** 2
bit_index = nqubits - qubit - 1
bits = (np.arange(len(probs)) >> bit_index) & 1
return float(np.dot(probs, 1.0 - 2.0 * bits))
def fidelity_and_l2(reference: np.ndarray, candidate: np.ndarray):
ref = np.asarray(reference, dtype=complex).reshape(-1)
cand = np.asarray(candidate, dtype=complex).reshape(-1)
fidelity = abs(np.vdot(ref, cand)) ** 2
l2_error = np.linalg.norm(ref - cand)
return float(fidelity), float(l2_error)
def total_variation_distance(reference: dict[str, int], candidate: dict[str, int], nshots: int):
keys = set(reference) | set(candidate)
return 0.5 * sum(abs(reference.get(key, 0) - candidate.get(key, 0)) for key in keys) / nshots
def profile_callable(func, output_path: Path, sort_by: str):
"""Profile a single invocation and dump textual stats."""
profiler = cProfile.Profile()
profiler.enable()
result = func()
profiler.disable()
output_path.parent.mkdir(parents=True, exist_ok=True)
with output_path.open("w", encoding="utf-8") as stream:
stats = pstats.Stats(profiler, stream=stream)
stats.strip_dirs().sort_stats(sort_by).print_stats(80)
stats.print_callers(30)
return result
def time_callable(func, repeats: int, warmup: int, profile_output: Path | None, profile_sort: str):
for _ in range(warmup):
func()
profiled_payload = None
if profile_output is not None:
profiled_payload = profile_callable(func, profile_output, profile_sort)
samples = []
payloads = []
for _ in range(repeats):
t0 = time.perf_counter()
payload = func()
elapsed = time.perf_counter() - t0
samples.append(elapsed)
payloads.append(payload)
final_payload = payloads[-1] if payloads else profiled_payload
return samples, final_payload
def summarize_samples(samples: list[float]):
return {
"min_s": min(samples),
"mean_s": mean(samples),
"max_s": max(samples),
"std_s": pstdev(samples) if len(samples) > 1 else 0.0,
"repeats": len(samples),
}
def workload_state(args):
circuit = make_circuit(args.circuit, args.nqubits, args.nlayers, args.seed)
backend = prepare_quimb_backend(
ansatz=args.ansatz,
max_bond=args.max_bond,
svd_cutoff=args.svd_cutoff,
optimizer=args.optimizer,
n_most_frequent_states=args.topk,
)
def run_once():
result = backend.execute_circuit(circuit, return_array=True)
return np.asarray(result.statevector).reshape(-1)
samples, statevector = time_callable(
run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort
)
summary = summarize_samples(samples)
correctness = None
if not args.no_compare:
ref_state, ref_time = run_qibojit_state(circuit)
fidelity, l2_error = fidelity_and_l2(ref_state, statevector)
correctness = {
"qibojit_time_s": ref_time,
"fidelity": fidelity,
"l2_error": l2_error,
}
return summary, correctness
def workload_shots(args):
circuit = make_circuit(
args.circuit, args.nqubits, args.nlayers, args.seed, add_measurements=True
)
backend = prepare_quimb_backend(
ansatz=args.ansatz,
max_bond=args.max_bond,
svd_cutoff=args.svd_cutoff,
optimizer=args.optimizer,
n_most_frequent_states=args.topk,
)
def run_once():
result = backend.execute_circuit(circuit, nshots=args.nshots)
return dict(result.frequencies())
samples, frequencies = time_callable(
run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort
)
summary = summarize_samples(samples)
correctness = None
if not args.no_compare:
ref_freq, ref_time = run_qibojit_shots(circuit, args.nshots)
correctness = {
"qibojit_time_s": ref_time,
"tvd": total_variation_distance(ref_freq, frequencies, args.nshots),
}
return summary, correctness
def workload_convert(args):
circuit = make_circuit(args.circuit, args.nqubits, args.nlayers, args.seed)
backend = prepare_quimb_backend(
ansatz=args.ansatz,
max_bond=args.max_bond,
svd_cutoff=args.svd_cutoff,
optimizer=args.optimizer,
n_most_frequent_states=args.topk,
)
def run_once():
quimb_circuit = backend._qibo_circuit_to_quimb( # pylint: disable=protected-access
circuit,
quimb_circuit_type=backend.circuit_ansatz,
gate_opts={"max_bond": backend.max_bond_dimension, "cutoff": backend.svd_cutoff},
)
return len(quimb_circuit.gates)
samples, gate_count = time_callable(
run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort
)
summary = summarize_samples(samples)
summary["gate_count"] = gate_count
return summary, None
def workload_expectation(args):
circuit = make_circuit(args.circuit, args.nqubits, args.nlayers, args.seed)
backend = prepare_quimb_backend(
ansatz=args.ansatz,
max_bond=args.max_bond,
svd_cutoff=args.svd_cutoff,
optimizer=args.optimizer,
n_most_frequent_states=args.topk,
)
operators = ["z"]
sites = [(args.observable_qubit,)]
coeffs = [1.0]
def run_once():
return float(
backend.exp_value_observable_symbolic(
circuit, operators, sites, coeffs, args.nqubits
)
)
samples, expval = time_callable(
run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort
)
summary = summarize_samples(samples)
correctness = None
if not args.no_compare:
ref_state, ref_time = run_qibojit_state(circuit)
correctness = {
"qibojit_time_s": ref_time,
"reference_expval": z_expectation_from_statevector(
ref_state, args.nqubits, args.observable_qubit
),
"abs_error": abs(
z_expectation_from_statevector(ref_state, args.nqubits, args.observable_qubit)
- expval
),
}
return summary, correctness
def workload_raw_local_exp(args):
circuit = make_circuit(args.circuit, args.nqubits, args.nlayers, args.seed)
backend = prepare_quimb_backend(
ansatz=args.ansatz,
max_bond=args.max_bond,
svd_cutoff=args.svd_cutoff,
optimizer=args.optimizer,
n_most_frequent_states=args.topk,
)
def run_once():
metrics = {}
t0 = time.perf_counter()
quimb_circuit = backend._qibo_circuit_to_quimb( # pylint: disable=protected-access
circuit,
quimb_circuit_type=backend.circuit_ansatz,
gate_opts={"max_bond": backend.max_bond_dimension, "cutoff": backend.svd_cutoff},
)
metrics["convert_s"] = time.perf_counter() - t0
operator = backend._string_to_quimb_operator("z") # pylint: disable=protected-access
if args.rehearse:
t1 = time.perf_counter()
rehearsal = quimb_circuit.local_expectation(
operator,
where=(args.observable_qubit,),
backend=backend.backend,
optimize=backend.contractions_optimizer,
simplify_sequence="R",
rehearse=True,
)
metrics["rehearse_s"] = time.perf_counter() - t1
optimize = rehearsal["tree"]
else:
metrics["rehearse_s"] = 0.0
optimize = backend.contractions_optimizer
t2 = time.perf_counter()
expval = quimb_circuit.local_expectation(
operator,
where=(args.observable_qubit,),
backend=backend.backend,
optimize=optimize,
simplify_sequence="R",
)
metrics["contract_s"] = time.perf_counter() - t2
metrics["total_inner_s"] = (
metrics["convert_s"] + metrics["rehearse_s"] + metrics["contract_s"]
)
metrics["expval"] = float(np.real(expval))
return metrics
samples, metrics = time_callable(
run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort
)
summary = summarize_samples(samples)
summary.update(
{
"convert_s": metrics["convert_s"],
"rehearse_s": metrics["rehearse_s"],
"contract_s": metrics["contract_s"],
"total_inner_s": metrics["total_inner_s"],
}
)
correctness = None
if not args.no_compare:
ref_state, ref_time = run_qibojit_state(circuit)
ref_expval = z_expectation_from_statevector(
ref_state, args.nqubits, args.observable_qubit
)
correctness = {
"qibojit_time_s": ref_time,
"reference_expval": ref_expval,
"abs_error": abs(ref_expval - metrics["expval"]),
}
return summary, correctness
WORKLOADS = {
"state": workload_state,
"shots": workload_shots,
"convert": workload_convert,
"expectation": workload_expectation,
"raw-local-exp": workload_raw_local_exp,
}
def build_parser():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--mode",
choices=sorted(WORKLOADS),
default="raw-local-exp",
help="Workload to benchmark.",
)
parser.add_argument(
"--circuit",
choices=["ghz", "ising1d", "qaoa", "qft", "rcs", "variational"],
default="variational",
)
parser.add_argument("--nqubits", type=int, default=10)
parser.add_argument("--nlayers", type=int, default=3)
parser.add_argument("--ansatz", choices=["mps", "tn"], default="tn")
parser.add_argument("--max-bond", type=int, default=None)
parser.add_argument("--svd-cutoff", type=float, default=1e-10)
parser.add_argument("--optimizer", type=str, default="auto-hq")
parser.add_argument("--observable-qubit", type=int, default=0)
parser.add_argument("--nshots", type=int, default=1024)
parser.add_argument("--topk", type=int, default=100)
parser.add_argument("--warmup", type=int, default=1)
parser.add_argument("--repeats", type=int, default=3)
parser.add_argument("--seed", type=int, default=42)
parser.add_argument("--quimb-num-procs", type=int, default=None)
parser.add_argument("--blas-threads", type=int, default=None)
parser.add_argument("--rehearse", action="store_true")
parser.add_argument("--no-compare", action="store_true")
parser.add_argument("--profile-output", type=Path, default=None)
parser.add_argument("--profile-sort", type=str, default="cumulative")
parser.add_argument("--json-output", type=Path, default=None)
return parser
def main():
parser = build_parser()
args = parser.parse_args()
configure_runtime_env(args.quimb_num_procs, args.blas_threads)
print(
f"mode={args.mode} circuit={args.circuit} nqubits={args.nqubits} "
f"nlayers={args.nlayers} ansatz={args.ansatz} optimizer={args.optimizer}"
)
if args.quimb_num_procs is not None or args.blas_threads is not None:
print(
"threads:"
f" QUIMB_NUM_PROCS={os.environ.get('QUIMB_NUM_PROCS')}"
f" OMP_NUM_THREADS={os.environ.get('OMP_NUM_THREADS')}"
)
workload = WORKLOADS[args.mode]
summary, correctness = workload(args)
print("\nTiming")
for key, value in summary.items():
if isinstance(value, float):
print(f"{key:>16}: {value:.6f}")
else:
print(f"{key:>16}: {value}")
if correctness is not None:
print("\nCorrectness")
for key, value in correctness.items():
if isinstance(value, float):
print(f"{key:>16}: {value:.6e}")
else:
print(f"{key:>16}: {value}")
if args.profile_output is not None:
print(f"\nProfile written to: {args.profile_output}")
if args.json_output is not None:
payload = {"timing": summary, "correctness": correctness, "args": vars(args)}
args.json_output.parent.mkdir(parents=True, exist_ok=True)
args.json_output.write_text(json.dumps(payload, indent=2, default=str), encoding="utf-8")
print(f"JSON written to: {args.json_output}")
if __name__ == "__main__":
main()

View File

@@ -186,7 +186,16 @@ def execute_circuit(
else:
frequencies = None
measured_probabilities = None
'''
if return_array:
if self.ansatz == "mps":
psi = circ_quimb.psi
statevector = psi.to_dense().reshape(-1)
else:
statevector = circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer)
else:
statevector = None
'''
statevector = (
circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer)
if return_array
@@ -291,6 +300,15 @@ def _qibo_circuit_to_quimb(
quimb_gate_name = GATE_MAP.get(gate_name, None)
if quimb_gate_name == "measure":
continue
if gate_name == "cu1":
theta = gate.parameters[0]
c, t = gate.qubits
circ.apply_gate("RZ", theta / 2, c)
circ.apply_gate("RZ", theta / 2, t)
circ.apply_gate("CNOT", c, t)
circ.apply_gate("RZ", -theta / 2, t)
circ.apply_gate("CNOT", c, t)
continue
if quimb_gate_name is None:
raise_error(ValueError, f"Gate {gate_name} not supported in Quimb backend.")

View File

@@ -57,10 +57,10 @@ class TensorNetworkResult:
return self.measures
def state(self):
"""Return the statevector if the number of qubits is less than 20."""
"""Return the statevector if the number of qubits is less than 35."""
if self.nqubits < 35:
return self.statevector
raise_error(
NotImplementedError,
f"Tensor network simulation cannot be used to reconstruct statevector for >= 20 .",
f"Tensor network simulation cannot be used to reconstruct statevector for >= 35 .",
)