diff --git a/.gitignore b/.gitignore index 2fee375..e6de13b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,8 @@ __pycache__/ data/ # C extensions *.so - +bak/ +perf* # Distribution / packaging .Python build/ diff --git a/bench_profile.py b/bench_profile.py new file mode 100644 index 0000000..26f8ac7 --- /dev/null +++ b/bench_profile.py @@ -0,0 +1,460 @@ +"""Benchmark: qibotn/quimb generic TN — single-process torch profiling version.""" + +import os +import pickle +import time +import argparse +import numpy as np +import cotengra as ctg +import qibo +from qibo import Circuit, gates + + +def make_circuit(circuit_type, nqubits, nlayers=1): + c = Circuit(nqubits) + + if circuit_type == "qft": + from qibo.models import QFT + return QFT(nqubits) + + elif circuit_type == "variational": + for layer in range(nlayers): + for q in range(nqubits): + c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi))) + + offset = layer % 2 + for q in range(offset, nqubits - 1, 2): + c.add(gates.CZ(q, q + 1)) + + elif circuit_type == "ghz": + c.add(gates.H(0)) + for q in range(nqubits - 1): + c.add(gates.CNOT(q, q + 1)) + + elif circuit_type == "brickwork": + for q in range(nqubits): + c.add(gates.H(q)) + + for layer in range(nlayers): + offset = layer % 2 + for q in range(offset, nqubits - 1, 2): + c.add(gates.CNOT(q, q + 1)) + c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi))) + c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi))) + + else: + raise ValueError(f"Unknown circuit: {circuit_type}") + + return c + + +def make_z_observable(nqubits): + """Z on qubit 0 only — single contraction for benchmarking.""" + return ["z"], [(0,)], [1.0] + + +def export_profiler_outputs(prof, trace_path): + """Export Chrome trace and text table.""" + prof.export_chrome_trace(trace_path) + + table_path = trace_path.replace(".json", ".txt") + with open(table_path, "w") as f: + f.write( + prof.key_averages().table( + sort_by="self_cpu_time_total", + row_limit=200, + ) + ) + + print(f" [torch profiler trace] {trace_path}") + print(f" [torch profiler table] {table_path}") + + +def run_quimb_tn( + circuit, + nqubits, + num_slices, + load_path=None, + save_path=None, +): + """Mode: expval — compute via local_expectation.""" + qibo.set_backend("qibotn", platform="quimb") + b = qibo.get_backend() + b.configure_tn_simulation(ansatz="tn") + + operators, sites, coeffs = make_z_observable(nqubits) + ops = b._string_to_quimb_operator(operators[0]) + + qc = b._qibo_circuit_to_quimb( + circuit, + quimb_circuit_type=b.circuit_ansatz, + gate_opts={"max_bond": None, "cutoff": 1e-10}, + ) + + if load_path: + with open(load_path, "rb") as f: + saved = pickle.load(f) + tree = saved["tree"] + t_search = 0.0 + print(f" [path loaded] {load_path}") + else: + opt = ctg.HyperOptimizer( + methods=["kahypar", "random-greedy", "spinglass"], + max_repeats=16, + parallel=True, + max_time=60, + slicing_opts={"target_slices": num_slices}, + progbar=True, + ) + + t0 = time.time() + rehearsal = qc.local_expectation( + ops, + where=sites[0], + optimize=opt, + simplify_sequence="R", + rehearse=True, + ) + t_search = time.time() - t0 + tree = rehearsal["tree"] + + print( + f" [path search] {t_search:.3f}s " + f"flops~2^{tree.contraction_cost():.2f} " + f"size~2^{tree.contraction_width():.2f} " + f"slices={tree.multiplicity}" + ) + + if save_path: + with open(save_path, "wb") as f: + pickle.dump({"tree": tree}, f) + print(f" [path saved] {save_path}") + + t0 = time.time() + expval = qc.local_expectation( + ops, + where=sites[0], + optimize=tree, + simplify_sequence="R", + ) + t_contract = time.time() - t0 + + print(f" [contraction] {t_contract:.3f}s") + + return float(expval.real), t_search + t_contract + + +def run_quimb_tn_statevector( + circuit, + nqubits, + num_slices, + load_path=None, + save_path=None, + profile=False, + profile_dir="profiles", +): + """Mode: statevector — contract full TN to dense vector, single process.""" + qibo.set_backend("qibotn", platform="quimb") + b = qibo.get_backend() + b.configure_tn_simulation(ansatz="tn") + + import torch + + qc = b._qibo_circuit_to_quimb( + circuit, + quimb_circuit_type=b.circuit_ansatz, + gate_opts={"max_bond": None, "cutoff": 1e-10}, + ) + + # 让 quimb 生成 torch tensor,这样 torch.profiler 能抓到 aten op。 + qc.to_backend = torch.from_numpy + + if load_path: + with open(load_path, "rb") as f: + saved = pickle.load(f) + tree = saved["tree"] + t_search = 0.0 + print(f" [path loaded] {load_path}") + else: + opt = ctg.HyperOptimizer( + methods=["kahypar", "random-greedy", "spinglass"], + max_repeats=500, + parallel=48, + max_time=100, + minimize="size", + slicing_opts={"target_slices": num_slices}, + progbar=True, + ) + + t0 = time.time() + rehearsal = qc.to_dense(optimize=opt, rehearse=True) + t_search = time.time() - t0 + tree = rehearsal["tree"] + + print( + f" [path search] {t_search:.3f}s " + f"flops~2^{tree.contraction_cost():.2f} " + f"size~2^{tree.contraction_width():.2f} " + f"slices={tree.multiplicity}" + ) + + if save_path: + with open(save_path, "wb") as f: + pickle.dump({"tree": tree}, f) + print(f" [path saved] {save_path}") + + os.makedirs(profile_dir, exist_ok=True) + + if profile: + from torch.profiler import profile as torch_profile + from torch.profiler import ProfilerActivity, record_function + + trace_path = os.path.join( + profile_dir, + ( + f"trace_statevector_" + f"{circuit.nqubits}q_" + f"slices{tree.multiplicity}_" + f"{int(time.time())}.json" + ), + ) + + t0 = time.time() + + with torch_profile( + activities=[ProfilerActivity.CPU], + record_shapes=True, + profile_memory=True, + with_stack=True, + ) as prof: + with record_function("qibotn_to_dense_contraction"): + sv = qc.to_dense(optimize=tree).reshape(-1) + + with record_function("torch_to_numpy_view_or_copy"): + if type(sv).__module__.startswith("torch"): + sv_tn = sv.detach().cpu().numpy() + else: + sv_tn = np.asarray(sv) + + t_contract = time.time() - t0 + + export_profiler_outputs(prof, trace_path) + + else: + t0 = time.time() + sv = qc.to_dense(optimize=tree).reshape(-1) + t_contract = time.time() - t0 + + if type(sv).__module__.startswith("torch"): + sv_tn = sv.detach().cpu().numpy() + else: + sv_tn = np.asarray(sv) + + print(f" [contraction] {t_contract:.3f}s") + + return sv_tn, t_search + t_contract + + +def run_quimb_tn_samples(circuit, nshots=1024): + """Mode: samples — sample from circuit output distribution.""" + qibo.set_backend("qibotn", platform="quimb") + b = qibo.get_backend() + b.configure_tn_simulation(ansatz="tn") + + t0 = time.time() + result = b.execute_circuit(circuit, nshots=nshots) + t_total = time.time() - t0 + + print(f" [sampling] {t_total:.3f}s nshots={nshots}") + + try: + freqs = result.frequencies() + print(f" top states: {dict(list(freqs.items())[:5])}") + except Exception: + pass + + return result, t_total + + +def qibojit_expval(circuit, nqubits): + """Compute via qibojit statevector.""" + qibo.set_backend("qibojit", platform="numba") + + t0 = time.time() + result = circuit() + elapsed = time.time() - t0 + + sv = np.array(result.state(), dtype=complex).flatten() + probs = np.abs(sv) ** 2 + + bits = (np.arange(len(probs)) >> (nqubits - 1)) & 1 + expval = float(np.dot(probs, 1 - 2 * bits)) + + return expval, elapsed + + +def run_qibojit(circuit): + """Compute full statevector via qibojit.""" + qibo.set_backend("qibojit", platform="numba") + + t0 = time.time() + result = circuit() + elapsed = time.time() - t0 + + sv = np.array(result.state(), dtype=complex).flatten() + + return sv, elapsed + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument("--nqubits", type=int, default=10) + parser.add_argument( + "--circuit", + type=str, + default="qft", + choices=["qft", "variational", "ghz", "brickwork"], + ) + parser.add_argument("--nlayers", type=int, default=3) + parser.add_argument("--num-slices", type=int, default=1) + parser.add_argument("--nshots", type=int, default=1024) + + parser.add_argument( + "--mode", + type=str, + default="statevector", + choices=["expval", "statevector", "samples"], + help="expval: local_expectation; statevector: to_dense; samples: sampling", + ) + + parser.add_argument( + "--no-compare", + action="store_true", + help="Skip qibojit reference run", + ) + + parser.add_argument( + "--save-path", + type=str, + default=None, + help="Save contraction tree to a pickle file", + ) + + parser.add_argument( + "--load-path", + type=str, + default=None, + help="Load contraction tree from a pickle file and skip path search", + ) + + parser.add_argument( + "--profile", + action="store_true", + help="Enable torch profiler for statevector contraction stage", + ) + + parser.add_argument( + "--profile-dir", + type=str, + default="profiles", + help="Directory to save torch profiler traces", + ) + + parser.add_argument( + "--save-statevector", + action="store_true", + help="Save TN statevector to data/sv_tn_*.npy", + ) + + args = parser.parse_args() + + print( + f"Circuit: {args.circuit}, " + f"nqubits={args.nqubits}, " + f"nlayers={args.nlayers}, " + f"mode={args.mode}, " + f"profile={args.profile}" + ) + + np.random.seed(42) + circuit_tn = make_circuit(args.circuit, args.nqubits, args.nlayers) + + try: + if args.mode == "expval": + expval_tn, t_tn = run_quimb_tn( + circuit_tn, + args.nqubits, + args.num_slices, + load_path=args.load_path, + save_path=args.save_path, + ) + + print(f"\n[quimb TN] time={t_tn:.4f}s ={expval_tn:.8f}") + + elif args.mode == "statevector": + sv_tn, t_tn = run_quimb_tn_statevector( + circuit_tn, + args.nqubits, + args.num_slices, + load_path=args.load_path, + save_path=args.save_path, + profile=args.profile, + profile_dir=args.profile_dir, + ) + + print( + f"\n[quimb TN] time={t_tn:.4f}s " + f"statevector shape={sv_tn.shape}" + ) + + if args.save_statevector: + os.makedirs("data", exist_ok=True) + out_path = f"data/sv_tn_{args.circuit}{args.nqubits}.npy" + np.save(out_path, sv_tn) + print(f"[saved statevector] {out_path}") + + else: + _, t_tn = run_quimb_tn_samples( + circuit_tn, + nshots=args.nshots, + ) + + print(f"\n[quimb TN] time={t_tn:.4f}s") + args.no_compare = True + + except Exception as e: + print(f"[quimb TN] FAILED: {e}") + raise + + if not args.no_compare and args.mode != "statevector": + np.random.seed(42) + circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) + + expval_ref, t_ref = qibojit_expval(circuit_ref, args.nqubits) + + print(f"[qibojit] time={t_ref:.4f}s ={expval_ref:.8f}") + print(f"\nDiff : {abs(expval_tn - expval_ref):.2e}") + + if t_tn > 0: + print(f"Speedup : {t_ref / t_tn:.2f}x") + + elif not args.no_compare and args.mode == "statevector" and sv_tn is not None: + np.random.seed(42) + circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) + + sv_ref, t_ref = run_qibojit(circuit_ref) + + fid = abs(np.dot(sv_ref.conj(), sv_tn)) ** 2 + l2_err = np.linalg.norm(sv_ref - sv_tn) + + print(f"[qibojit] time={t_ref:.4f}s") + print(f"Fidelity : {fid:.8f} (1=perfect)") + print(f"L2 error : {l2_err:.2e}") + + if t_tn > 0: + print(f"Speedup : {t_ref / t_tn:.2f}x") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/benchmark_mps.py b/benchmark_mps.py index 90c8275..827fd77 100644 --- a/benchmark_mps.py +++ b/benchmark_mps.py @@ -4,16 +4,17 @@ import argparse import os import numpy as np import qibo +import quimb.tensor as qtn from qibo import Circuit, gates DATA_DIR = os.path.join(os.path.dirname(__file__), "data") -def make_circuit(circuit_type, nqubits, nlayers=1): +def make_circuit(circuit_type, nqubits, nlayers=1, add_measurements=False): c = Circuit(nqubits) if circuit_type == "qft": from qibo.models import QFT - return QFT(nqubits) + c = QFT(nqubits) elif circuit_type == "variational": for layer in range(nlayers): for q in range(nqubits): @@ -27,6 +28,8 @@ def make_circuit(circuit_type, nqubits, nlayers=1): c.add(gates.CNOT(q, q + 1)) else: raise ValueError(f"Unknown circuit: {circuit_type}") + if add_measurements: + c.add(gates.M(*range(nqubits))) return c @@ -39,20 +42,58 @@ def run_qibojit(circuit): return sv, elapsed -def run_quimb_mps(circuit, max_bond, svd_cutoff, optimizer): +def run_quimb_mps(circuit, max_bond, svd_cutoff, optimizer, nshots=None): qibo.set_backend("qibotn", platform="quimb") b = qibo.get_backend() b.configure_tn_simulation(ansatz="mps", max_bond_dimension=max_bond, svd_cutoff=svd_cutoff) b.contractions_optimizer = optimizer t0 = time.time() - result = b.execute_circuit(circuit, return_array=True) - elapsed = time.time() - t0 - sv = result.state() - return sv, elapsed + if nshots: + result = b.execute_circuit(circuit, nshots=nshots) + elapsed = time.time() - t0 + return result.frequencies(), elapsed, 0.0 + else: + # MPS simulation + circ_quimb = qtn.CircuitMPS.from_openqasm2_str( + circuit.to_qasm(), + gate_opts={"max_bond": max_bond, "cutoff": svd_cutoff}, + ) + t_mps = time.time() - t0 + # to_dense separately + t1 = time.time() + #sv = circ_quimb.psi.to_dense().reshape(-1) + sv = None + t_dense = time.time() - t1 + return sv, t_mps, t_dense -def compare(sv_ref, sv_mps): +def run_quimb_permmps(circuit, max_bond, svd_cutoff, nshots=None): + gates_list = [ + qtn.Gate(g.name, params=list(g.parameters), qubits=list(g.qubits)) + for g in circuit.queue + if g.name.lower() != "measure" + ] + t0 = time.time() + circ = qtn.CircuitPermMPS.from_gates( + gates_list, + N=circuit.nqubits, + max_bond=max_bond, + cutoff=svd_cutoff, + ) + if nshots: + from collections import Counter + result = Counter(circ.sample(nshots)) + elapsed = time.time() - t0 + return dict(result), elapsed + else: + mps = circ.get_psi_unordered() + sv = mps.to_dense().reshape(-1) + elapsed = time.time() - t0 + return sv, elapsed + + +def compare_statevector(sv_ref, sv_mps): sv_ref = np.array(sv_ref, dtype=complex).flatten() sv_mps = np.array(sv_mps, dtype=complex).flatten() fidelity = abs(np.dot(sv_ref.conj(), sv_mps)) ** 2 @@ -60,6 +101,12 @@ def compare(sv_ref, sv_mps): return fidelity, l2_err +def compare_frequencies(freq_ref, freq_mps, nshots): + all_keys = set(freq_ref) | set(freq_mps) + tvd = 0.5 * sum(abs(freq_ref.get(k, 0) - freq_mps.get(k, 0)) for k in all_keys) / nshots + return tvd + + def jit_cache_path(circuit_type, nqubits, nlayers): os.makedirs(DATA_DIR, exist_ok=True) return os.path.join(DATA_DIR, f"jit_{circuit_type}_n{nqubits}_l{nlayers}.npy") @@ -74,37 +121,65 @@ def main(): parser.add_argument("--max-bond", type=int, default=None, help="Max bond dimension for MPS (None = unlimited)") parser.add_argument("--svd-cutoff", type=float, default=1e-6) - parser.add_argument("--optimizer", type=str, default="auto-hq") + parser.add_argument("--optimizer", type=str, default="eager") + parser.add_argument("--nshots", type=int, default=None, + help="Use sampling mode with given number of shots instead of statevector") + parser.add_argument("--permmps", action="store_true", + help="Use CircuitPermMPS directly instead of qibotn backend") parser.add_argument("--skip-jit", action="store_true", help="Skip qibojit run, load cached statevector if available") + parser.add_argument("--no-compare", action="store_true", + help="Skip correctness comparison entirely") args = parser.parse_args() print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}") print(f"MPS config: max_bond={args.max_bond}, svd_cutoff={args.svd_cutoff}, optimizer={args.optimizer}") - cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers) + ref = None t_ref = None - if args.skip_jit and os.path.exists(cache_path): - sv_ref = np.load(cache_path) - print(f"\n[qibojit] loaded from cache: {cache_path}") - else: - np.random.seed(42) - circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) - sv_ref, t_ref = run_qibojit(circuit_ref) - np.save(cache_path, sv_ref) - print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})") + if not args.no_compare: + cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers) + if args.nshots: + # frequency mode: run qibojit with same nshots + np.random.seed(42) + circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers, add_measurements=True) + qibo.set_backend("qibojit", platform="numba") + t0 = time.time() + result_ref = circuit_ref(nshots=args.nshots) + t_ref = time.time() - t0 + ref = dict(result_ref.frequencies()) + print(f"\n[qibojit] time={t_ref:.4f}s") + elif args.skip_jit and os.path.exists(cache_path): + ref = np.load(cache_path) + print(f"\n[qibojit] loaded from cache: {cache_path}") + else: + np.random.seed(42) + circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) + ref, t_ref = run_qibojit(circuit_ref) + np.save(cache_path, ref) + print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})") np.random.seed(42) circuit_mps = make_circuit(args.circuit, args.nqubits, args.nlayers) + label = "quimb PermMPS" if args.permmps else "quimb MPS" try: - sv_mps, t_mps = run_quimb_mps(circuit_mps, args.max_bond, args.svd_cutoff, args.optimizer) - fidelity, l2_err = compare(sv_ref, sv_mps) - print(f"[quimb MPS] time={t_mps:.4f}s") - print(f"\nFidelity : {fidelity:.8f} (1=perfect)") - print(f"L2 error : {l2_err:.2e}") - if t_ref is not None and t_mps > 0: - print(f"Speedup : {t_ref/t_mps:.2f}x") + if args.permmps: + out, t_mps = run_quimb_permmps(circuit_mps, args.max_bond, args.svd_cutoff, args.nshots) + t_dense = 0.0 + else: + out, t_mps, t_dense = run_quimb_mps(circuit_mps, args.max_bond, args.svd_cutoff, args.optimizer, args.nshots) + print(f"[{label}] MPS sim={t_mps:.4f}s to_dense={t_dense:.4f}s total={t_mps+t_dense:.4f}s") + if not args.no_compare: + if args.nshots: + tvd = compare_frequencies(ref, out, args.nshots) + print(f"\nTVD : {tvd:.6f} (0=perfect)") + else: + fidelity, l2_err = compare_statevector(ref, out) + print(f"\nFidelity : {fidelity:.8f} (1=perfect)") + print(f"L2 error : {l2_err:.2e}") + if t_ref is not None and t_mps > 0: + print(f"Speedup : {t_ref/t_mps:.2f}x") except Exception as e: print(f"[quimb MPS] FAILED: {e}") raise diff --git a/benchmark_qmatchatea.py b/benchmark_qmatchatea.py new file mode 100644 index 0000000..9e7923b --- /dev/null +++ b/benchmark_qmatchatea.py @@ -0,0 +1,126 @@ +"""Benchmark: qibojit (reference) vs qibotn/qmatchatea MPS.""" +import time +import argparse +import os +import numpy as np +import qibo +from qibo import Circuit, gates +from qibo.backends import construct_backend + +DATA_DIR = os.path.join(os.path.dirname(__file__), "data") + + +def make_circuit(circuit_type, nqubits, nlayers=1): + c = Circuit(nqubits) + if circuit_type == "qft": + from qibo.models import QFT + return QFT(nqubits) + elif circuit_type == "variational": + for layer in range(nlayers): + for q in range(nqubits): + c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi))) + offset = layer % 2 + for q in range(offset, nqubits - 1, 2): + c.add(gates.CZ(q, q + 1)) + elif circuit_type == "ghz": + c.add(gates.H(0)) + for q in range(nqubits - 1): + c.add(gates.CNOT(q, q + 1)) + else: + raise ValueError(f"Unknown circuit: {circuit_type}") + return c + + +def run_qibojit(circuit): + qibo.set_backend("qibojit", platform="numba") + t0 = time.time() + result = circuit() + elapsed = time.time() - t0 + return result.state(), elapsed + + +def run_qmatchatea(circuit, max_bond, cut_ratio): + import qmatchatea, qtealeaves.observables + from qibo.backends import construct_backend as _cb + b = _cb(backend="qibotn", platform="qmatchatea") + b.configure_tn_simulation(ansatz="MPS", max_bond_dimension=max_bond, cut_ratio=cut_ratio) + + qk_circuit = b._qibocirc_to_qiskitcirc(circuit) + run_qk_params = qmatchatea.preprocessing.qk_transpilation_params(False) + observables = qtealeaves.observables.TNObservables() + observables += qtealeaves.observables.TNState2File(name="temp", formatting="D") + + t0 = time.time() + results = qmatchatea.run_simulation( + circ=qk_circuit, + convergence_parameters=b.convergence_params, + transpilation_parameters=run_qk_params, + backend=b.qmatchatea_backend, + observables=observables, + ) + elapsed = time.time() - t0 + tn_state = results.observables.get("tn_state") + if tn_state is None: + results.load_state() + tn_state = results.observables["tn_state"] + sv_obj = tn_state.to_statevector(qiskit_order=False, max_qubit_equivalent=40) + sv = np.array(sv_obj.elem, dtype=complex).flatten() + return sv, elapsed + + +def compare(sv_ref, sv_mps): + sv_ref = np.array(sv_ref, dtype=complex).flatten() + fidelity = abs(np.dot(sv_ref.conj(), sv_mps)) ** 2 + l2_err = np.linalg.norm(sv_ref - sv_mps) + return fidelity, l2_err + + +def jit_cache_path(circuit_type, nqubits, nlayers): + os.makedirs(DATA_DIR, exist_ok=True) + return os.path.join(DATA_DIR, f"jit_{circuit_type}_n{nqubits}_l{nlayers}.npy") + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--nqubits", type=int, default=10) + parser.add_argument("--circuit", type=str, default="ghz", + choices=["qft", "variational", "ghz"]) + parser.add_argument("--nlayers", type=int, default=3) + parser.add_argument("--max-bond", type=int, default=64) + parser.add_argument("--cut-ratio", type=float, default=1e-6) + parser.add_argument("--skip-jit", action="store_true") + args = parser.parse_args() + + print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}") + print(f"MPS config: max_bond={args.max_bond}, cut_ratio={args.cut_ratio}") + + cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers) + t_ref = None + + if args.skip_jit and os.path.exists(cache_path): + sv_ref = np.load(cache_path) + print(f"\n[qibojit] loaded from cache: {cache_path}") + else: + np.random.seed(42) + circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) + sv_ref, t_ref = run_qibojit(circuit_ref) + np.save(cache_path, sv_ref) + print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})") + + np.random.seed(42) + circuit_mps = make_circuit(args.circuit, args.nqubits, args.nlayers) + try: + sv_mps, t_mps = run_qmatchatea(circuit_mps, args.max_bond, args.cut_ratio) + fidelity, l2_err = compare(sv_ref, sv_mps) + print(f"[qmatchatea] time={t_mps:.4f}s") + print(f"\nFidelity : {fidelity:.8f} (1=perfect)") + print(f"L2 error : {l2_err:.2e}") + if t_ref is not None and t_mps > 0: + print(f"Speedup : {t_ref/t_mps:.2f}x") + except Exception as e: + print(f"[qmatchatea] FAILED: {e}") + raise + + +if __name__ == "__main__": + main() diff --git a/benchmark_tn.py b/benchmark_tn.py index 4daa6fb..6dae804 100644 --- a/benchmark_tn.py +++ b/benchmark_tn.py @@ -1,4 +1,5 @@ """Benchmark: qibotn/quimb generic TN — expectation values.""" +import pickle import time import argparse import numpy as np @@ -22,6 +23,15 @@ def make_circuit(circuit_type, nqubits, nlayers=1): c.add(gates.H(0)) for q in range(nqubits - 1): c.add(gates.CNOT(q, q + 1)) + elif circuit_type == "brickwork": + for q in range(nqubits): + c.add(gates.H(q)) + for layer in range(nlayers): + offset = layer % 2 + for q in range(offset, nqubits - 1, 2): + c.add(gates.CNOT(q, q + 1)) + c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi))) + c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi))) else: raise ValueError(f"Unknown circuit: {circuit_type}") return c @@ -33,80 +43,305 @@ def make_z_observable(nqubits): return ["z"], [(0,)], [1.0] -def run_quimb_tn(circuit, nqubits): +def run_quimb_tn(circuit, nqubits, num_slices, load_path=None, save_path=None): + """Mode: expval — compute via local_expectation (lightcone pruning).""" qibo.set_backend("qibotn", platform="quimb") b = qibo.get_backend() - b.configure_tn_simulation(ansatz="tn") # generic TN, no MPS - #if max_time is not None: - # opt = ctg.HyperOptimizer(max_repeats=128, max_time=max_time, minimize=minimize, parallel=True) - #else: - opt = ctg.HyperOptimizer( - max_repeats=16, - parallel=True, - slicing_opts={'target_size': 2**24}, # 限制单个张量最大 2^28 个元素 - progbar=True - ) - - b.contractions_optimizer = opt + b.configure_tn_simulation(ansatz="tn") + operators, sites, coeffs = make_z_observable(nqubits) + ops = b._string_to_quimb_operator(operators[0]) + qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz, + gate_opts={"max_bond": None, "cutoff": 1e-10}) + + if load_path: + with open(load_path, "rb") as f: + saved = pickle.load(f) + tree = saved["tree"] + t_search = 0.0 + print(f" [path loaded] {load_path}") + else: + opt = ctg.HyperOptimizer( + methods=['kahypar', 'random-greedy', 'spinglass'], + max_repeats=16, + parallel=True, + max_time=60, + slicing_opts={'target_slices': num_slices}, + progbar=True, + ) + t0 = time.time() + rehearsal = qc.local_expectation(ops, where=sites[0], optimize=opt, + simplify_sequence="R", rehearse=True) + t_search = time.time() - t0 + tree = rehearsal['tree'] + print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}") + + if save_path: + with open(save_path, "wb") as f: + pickle.dump({"tree": tree}, f) + print(f" [path saved] {save_path}") + t0 = time.time() - expval = b.exp_value_observable_symbolic(circuit, operators, sites, coeffs, nqubits) - elapsed = time.time() - t0 - return expval, elapsed + expval = qc.local_expectation(ops, where=sites[0], optimize=tree, simplify_sequence="R") + t_contract = time.time() - t0 + print(f" [contraction] {t_contract:.3f}s") + + return float(expval.real), t_search + t_contract + + +def run_quimb_tn_statevector(circuit, nqubits, num_slices, load_path=None, save_path=None): + """Mode: statevector — contract full TN to dense vector.""" + qibo.set_backend("qibotn", platform="quimb") + b = qibo.get_backend() + b.configure_tn_simulation(ansatz="tn") + + import torch + qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz, + gate_opts={"max_bond": None, "cutoff": 1e-10}) + qc.to_backend = torch.from_numpy + if load_path: + with open(load_path, "rb") as f: + saved = pickle.load(f) + tree = saved["tree"] + t_search = 0.0 + print(f" [path loaded] {load_path}") + else: + opt = ctg.HyperOptimizer( + methods=['kahypar', 'random-greedy', 'spinglass'], + max_repeats=128, + parallel=64, + max_time=100, + minimize='size', + slicing_opts={'target_slices': num_slices}, + #slicing_opts={'target_size': 2**30}, + progbar=True, + ) + t0 = time.time() + rehearsal = qc.to_dense(optimize=opt, rehearse=True) + t_search = time.time() - t0 + tree = rehearsal['tree'] + print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}") + + if save_path: + with open(save_path, "wb") as f: + pickle.dump({"tree": tree}, f) + print(f" [path saved] {save_path}") + + t0 = time.time() + sv = qc.to_dense(optimize=tree,implementation="cotengra").reshape(-1) + t_contract = time.time() - t0 + print(f" [contraction] {t_contract:.3f}s") + sv_tn = np.array(sv) + return sv_tn, t_search + t_contract + + +def _contract_mpi(tree, arrays, comm, root=0): + """Contract slices via MPI, returning result as the same array type as input. + + Unlike ``cotengra.ContractionTree.contract_mpi``, this works with any + array backend (numpy, torch, etc.) — it only converts to numpy at the + MPI-reduce boundary and converts back. + """ + size = comm.Get_size() + rank = comm.Get_rank() + + result_np = None + is_torch = type(arrays[0]).__module__.startswith("torch") + + for i in range(rank, tree.multiplicity, size): + x = tree.contract_slice(arrays, i) + x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x)) + + if result_np is None: + result_np = x_np + else: + result_np += x_np + + if result_np is None: + result_np = np.zeros(1, dtype=np.complex64) + + if rank == root: + result = np.zeros_like(result_np) + else: + result = None + comm.Reduce(result_np, result, root=root) + + if rank == root: + import torch + return torch.from_numpy(np.asarray(result)) if is_torch else result + return None + + +def run_quimb_tn_statevector_mpi(circuit, nqubits, num_slices, load_path=None, save_path=None): + """MPI-parallel statevector via custom MPI contraction (supports torch backend).""" + from mpi4py import MPI + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + + qibo.set_backend("qibotn", platform="quimb") + b = qibo.get_backend() + b.configure_tn_simulation(ansatz="tn") + + import torch + qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz, + gate_opts={"max_bond": None, "cutoff": 1e-10}) + qc.to_backend = torch.from_numpy + + # path search on rank 0, broadcast to all + if rank == 0: + if load_path: + with open(load_path, "rb") as f: + saved = pickle.load(f) + tree = saved["tree"] + psi = saved["psi"] + t_search = 0.0 + print(f" [path loaded] {load_path}") + else: + opt = ctg.HyperOptimizer( + methods=['kahypar', 'random-greedy', 'spinglass'], + max_repeats=128, + parallel=64, + #max_repeats=1, + max_time=100, + minimize='size', + slicing_opts={'target_slices': max(num_slices, size), 'allow_outer': False}, + progbar=True, + ) + t0 = time.time() + rehearsal = qc.to_dense(optimize=opt, rehearse=True) + t_search = time.time() - t0 + tree = rehearsal['tree'] + psi = rehearsal['tn'] + print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f} slices={tree.multiplicity}") + if save_path: + with open(save_path, "wb") as f: + pickle.dump({"tree": tree, "psi": psi}, f) + print(f" [path saved] {save_path}") + else: + tree = None + psi = None + t_search = 0.0 + + tree = comm.bcast(tree, root=0) + psi = comm.bcast(psi, root=0) + t_search = comm.bcast(t_search, root=0) + + arrays = psi.arrays + t0 = time.time() + sv = _contract_mpi(tree, arrays, comm, root=0) + t_contract = time.time() - t0 + + if rank == 0: + print(f" [contraction] {t_contract:.3f}s") + return np.array(sv).reshape(-1), t_search + t_contract + return None, t_search + t_contract + + +def run_quimb_tn_samples(circuit, nshots=1024): + """Mode: samples — sample from circuit output distribution.""" + qibo.set_backend("qibotn", platform="quimb") + b = qibo.get_backend() + b.configure_tn_simulation(ansatz="tn") + + t0 = time.time() + result = b.execute_circuit(circuit, nshots=nshots) + t_total = time.time() - t0 + print(f" [sampling] {t_total:.3f}s nshots={nshots}") + print(f" top states: {dict(list(result.frequencies().items())[:5])}") + return result, t_total def qibojit_expval(circuit, nqubits): - """Compute sum_i via qibojit statevector.""" + """Compute via qibojit statevector.""" qibo.set_backend("qibojit", platform="numba") t0 = time.time() result = circuit() elapsed = time.time() - t0 sv = np.array(result.state(), dtype=complex).flatten() probs = np.abs(sv) ** 2 - expval = sum( - probs[idx] * (1 - 2 * ((idx >> (nqubits - 1 - i)) & 1)) - for i in range(nqubits) - for idx in range(len(probs)) - ) - return float(expval), elapsed + bits = (np.arange(len(probs)) >> (nqubits - 1)) & 1 + expval = float(np.dot(probs, 1 - 2 * bits)) + return expval, elapsed + + +def run_qibojit(circuit): + """Compute full statevector via qibojit.""" + qibo.set_backend("qibojit", platform="numba") + t0 = time.time() + result = circuit() + elapsed = time.time() - t0 + sv = np.array(result.state(), dtype=complex).flatten() + return sv, elapsed def main(): parser = argparse.ArgumentParser() parser.add_argument("--nqubits", type=int, default=10) parser.add_argument("--circuit", type=str, default="qft", - choices=["qft", "variational", "ghz"]) + choices=["qft", "variational", "ghz", "brickwork"]) parser.add_argument("--nlayers", type=int, default=3) - parser.add_argument("--optimizer", type=str, default="auto-hq") - parser.add_argument("--max-time", type=float, default=None, - help="HyperOptimizer max search time (seconds); overrides --optimizer") - parser.add_argument("--minimize", type=str, default="flops", - choices=["flops", "size", "write"], - help="HyperOptimizer minimize target") + parser.add_argument("--num-slices", type=int, default=1) + parser.add_argument("--nshots", type=int, default=1024) + parser.add_argument("--mode", type=str, default="statevector", + choices=["expval", "statevector", "samples"], + help="expval: local_expectation; statevector: to_dense; samples: sampling") + parser.add_argument("--mpi", action="store_true", + help="Use MPI-parallel contraction (run with mpirun -n N)") parser.add_argument("--no-compare", action="store_true", help="Skip qibojit reference run") + parser.add_argument("--save-path", type=str, default=None, + help="Save contraction tree to a pickle file") + parser.add_argument("--load-path", type=str, default=None, + help="Load contraction tree from a pickle file (skip path search)") args = parser.parse_args() - print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}") - print(f"TN config: optimizer={args.optimizer}, max_time={args.max_time}, minimize={args.minimize}") + print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}, mode={args.mode}") np.random.seed(42) circuit_tn = make_circuit(args.circuit, args.nqubits, args.nlayers) try: - expval_tn, t_tn = run_quimb_tn(circuit_tn, args.nqubits) - print(f"\n[quimb TN] time={t_tn:.4f}s ={expval_tn:.8f}") + if args.mode == "expval": + expval_tn, t_tn = run_quimb_tn(circuit_tn, args.nqubits, args.num_slices, + load_path=args.load_path, save_path=args.save_path) + print(f"\n[quimb TN] time={t_tn:.4f}s ={expval_tn:.8f}") + elif args.mode == "statevector": + if args.mpi: + sv_tn, t_tn = run_quimb_tn_statevector_mpi(circuit_tn, args.nqubits, args.num_slices, + load_path=args.load_path, save_path=args.save_path) + else: + sv_tn, t_tn = run_quimb_tn_statevector(circuit_tn, args.nqubits, args.num_slices, + load_path=args.load_path, save_path=args.save_path) + if sv_tn is not None: + print(f"\n[quimb TN] time={t_tn:.4f}s statevector shape={sv_tn.shape}") + np.save(f"data/sv_tn_{args.circuit}{args.nqubits}.npy", sv_tn) + else: + _, t_tn = run_quimb_tn_samples(circuit_tn, args.nqubits, args.nshots) + print(f"\n[quimb TN] time={t_tn:.4f}s") + args.no_compare = True # samples 模式无法和 qibojit 期望值对比 except Exception as e: print(f"[quimb TN] FAILED: {e}") raise - if not args.no_compare: + if not args.no_compare and args.mode != "statevector": np.random.seed(42) circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) expval_ref, t_ref = qibojit_expval(circuit_ref, args.nqubits) - print(f"[qibojit] time={t_ref:.4f}s ={expval_ref:.8f}") + print(f"[qibojit] time={t_ref:.4f}s ={expval_ref:.8f}") print(f"\nDiff : {abs(expval_tn - expval_ref):.2e}") if t_tn > 0: print(f"Speedup : {t_ref/t_tn:.2f}x") + elif not args.no_compare and args.mode == "statevector" and sv_tn is not None: + np.random.seed(42) + circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) + sv_ref, t_ref = run_qibojit(circuit_ref) + fid = abs(np.dot(sv_ref.conj(), sv_tn)) ** 2 + l2_err = np.linalg.norm(sv_ref - sv_tn) + print(f"[qibojit] time={t_ref:.4f}s") + print(f"Fidelity : {fid:.8f} (1=perfect)") + print(f"L2 error : {l2_err:.2e}") + if t_tn > 0: + print(f"Speedup : {t_ref/t_tn:.2f}x") if __name__ == "__main__": diff --git a/benchmarks/benchmark_quimb.py b/benchmarks/benchmark_quimb.py new file mode 100644 index 0000000..9566cdc --- /dev/null +++ b/benchmarks/benchmark_quimb.py @@ -0,0 +1,519 @@ +"""Benchmark and profile the qibotn/quimb CPU backend. + +This script is intended to be the stable baseline for quimb backend +optimization work. It supports: + +- multiple circuit families +- MPS or generic TN execution +- statevector, sampling, conversion, and local expectation workloads +- warmup/repeat timing +- optional correctness checks against qibojit +- optional cProfile output +""" + +from __future__ import annotations + +import argparse +import cProfile +import json +import math +import os +import pstats +import time +from pathlib import Path +from statistics import mean, pstdev + +import numpy as np +import qibo +from qibo import Circuit, gates + + +def configure_runtime_env(quimb_num_procs: int | None, blas_threads: int | None): + """Pin process-level thread settings before heavy work starts.""" + if quimb_num_procs is not None: + os.environ["QUIMB_NUM_PROCS"] = str(quimb_num_procs) + if blas_threads is not None: + value = str(blas_threads) + os.environ["OMP_NUM_THREADS"] = value + os.environ["OPENBLAS_NUM_THREADS"] = value + os.environ["MKL_NUM_THREADS"] = value + os.environ["NUMEXPR_NUM_THREADS"] = value + + +def make_circuit( + circuit_type: str, + nqubits: int, + nlayers: int, + seed: int, + add_measurements: bool = False, +) -> Circuit: + """Construct repeatable workloads covering low/high entanglement cases.""" + rng = np.random.default_rng(seed) + circuit = Circuit(nqubits) + + if circuit_type == "qft": + from qibo.models import QFT + + circuit = QFT(nqubits) + elif circuit_type == "variational": + for layer in range(nlayers): + for qubit in range(nqubits): + circuit.add(gates.RY(qubit, theta=rng.uniform(0.0, 2.0 * np.pi))) + offset = layer % 2 + for qubit in range(offset, nqubits - 1, 2): + circuit.add(gates.CZ(qubit, qubit + 1)) + elif circuit_type == "ghz": + circuit.add(gates.H(0)) + for qubit in range(nqubits - 1): + circuit.add(gates.CNOT(qubit, qubit + 1)) + elif circuit_type == "qaoa": + for _ in range(nlayers): + for qubit in range(nqubits): + circuit.add(gates.RZ(qubit, theta=rng.uniform(0.0, 2.0 * np.pi))) + for qubit in range(0, nqubits - 1, 2): + circuit.add(gates.CZ(qubit, qubit + 1)) + for qubit in range(nqubits): + circuit.add(gates.RX(qubit, theta=rng.uniform(0.0, 2.0 * np.pi))) + elif circuit_type == "ising1d": + for _ in range(nlayers): + for qubit in range(nqubits): + circuit.add(gates.RX(qubit, theta=rng.uniform(0.0, 2.0 * np.pi))) + for qubit in range(0, nqubits - 1, 2): + circuit.add(gates.CZ(qubit, qubit + 1)) + for qubit in range(1, nqubits - 1, 2): + circuit.add(gates.CZ(qubit, qubit + 1)) + elif circuit_type == "rcs": + cols = math.ceil(math.sqrt(nqubits)) + rows = math.ceil(nqubits / cols) + single_qubit_gates = [gates.T, gates.X, gates.Y] + for layer in range(nlayers): + for qubit in range(nqubits): + gate_cls = single_qubit_gates[rng.integers(0, len(single_qubit_gates))] + circuit.add(gate_cls(qubit)) + if layer % 2 == 0: + for row in range(rows): + for col in range(0, cols - 1, 2): + q1, q2 = row * cols + col, row * cols + col + 1 + if q2 < nqubits: + circuit.add(gates.CZ(q1, q2)) + else: + for row in range(0, rows - 1, 2): + for col in range(cols): + q1, q2 = row * cols + col, (row + 1) * cols + col + if q2 < nqubits: + circuit.add(gates.CZ(q1, q2)) + else: + raise ValueError(f"Unknown circuit type: {circuit_type}") + + if add_measurements: + circuit.add(gates.M(*range(nqubits))) + return circuit + + +def prepare_quimb_backend( + ansatz: str, + max_bond: int | None, + svd_cutoff: float, + optimizer: str, + n_most_frequent_states: int, +): + """Create and configure the qibotn/quimb backend once.""" + qibo.set_backend("qibotn", platform="quimb") + backend = qibo.get_backend() + backend.configure_tn_simulation( + ansatz=ansatz, + max_bond_dimension=max_bond, + svd_cutoff=svd_cutoff, + n_most_frequent_states=n_most_frequent_states, + ) + backend.contractions_optimizer = optimizer + return backend + + +def run_qibojit_state(circuit: Circuit): + qibo.set_backend("qibojit", platform="numba") + t0 = time.perf_counter() + result = circuit() + elapsed = time.perf_counter() - t0 + state = np.asarray(result.state(), dtype=complex).reshape(-1) + return state, elapsed + + +def run_qibojit_shots(circuit: Circuit, nshots: int): + qibo.set_backend("qibojit", platform="numba") + t0 = time.perf_counter() + result = circuit(nshots=nshots) + elapsed = time.perf_counter() - t0 + return dict(result.frequencies()), elapsed + + +def z_expectation_from_statevector(statevector: np.ndarray, nqubits: int, qubit: int): + probs = np.abs(np.asarray(statevector).reshape(-1)) ** 2 + bit_index = nqubits - qubit - 1 + bits = (np.arange(len(probs)) >> bit_index) & 1 + return float(np.dot(probs, 1.0 - 2.0 * bits)) + + +def fidelity_and_l2(reference: np.ndarray, candidate: np.ndarray): + ref = np.asarray(reference, dtype=complex).reshape(-1) + cand = np.asarray(candidate, dtype=complex).reshape(-1) + fidelity = abs(np.vdot(ref, cand)) ** 2 + l2_error = np.linalg.norm(ref - cand) + return float(fidelity), float(l2_error) + + +def total_variation_distance(reference: dict[str, int], candidate: dict[str, int], nshots: int): + keys = set(reference) | set(candidate) + return 0.5 * sum(abs(reference.get(key, 0) - candidate.get(key, 0)) for key in keys) / nshots + + +def profile_callable(func, output_path: Path, sort_by: str): + """Profile a single invocation and dump textual stats.""" + profiler = cProfile.Profile() + profiler.enable() + result = func() + profiler.disable() + + output_path.parent.mkdir(parents=True, exist_ok=True) + with output_path.open("w", encoding="utf-8") as stream: + stats = pstats.Stats(profiler, stream=stream) + stats.strip_dirs().sort_stats(sort_by).print_stats(80) + stats.print_callers(30) + return result + + +def time_callable(func, repeats: int, warmup: int, profile_output: Path | None, profile_sort: str): + for _ in range(warmup): + func() + + profiled_payload = None + if profile_output is not None: + profiled_payload = profile_callable(func, profile_output, profile_sort) + + samples = [] + payloads = [] + + for _ in range(repeats): + t0 = time.perf_counter() + payload = func() + elapsed = time.perf_counter() - t0 + samples.append(elapsed) + payloads.append(payload) + + final_payload = payloads[-1] if payloads else profiled_payload + return samples, final_payload + + +def summarize_samples(samples: list[float]): + return { + "min_s": min(samples), + "mean_s": mean(samples), + "max_s": max(samples), + "std_s": pstdev(samples) if len(samples) > 1 else 0.0, + "repeats": len(samples), + } + + +def workload_state(args): + circuit = make_circuit(args.circuit, args.nqubits, args.nlayers, args.seed) + backend = prepare_quimb_backend( + ansatz=args.ansatz, + max_bond=args.max_bond, + svd_cutoff=args.svd_cutoff, + optimizer=args.optimizer, + n_most_frequent_states=args.topk, + ) + + def run_once(): + result = backend.execute_circuit(circuit, return_array=True) + return np.asarray(result.statevector).reshape(-1) + + samples, statevector = time_callable( + run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort + ) + summary = summarize_samples(samples) + + correctness = None + if not args.no_compare: + ref_state, ref_time = run_qibojit_state(circuit) + fidelity, l2_error = fidelity_and_l2(ref_state, statevector) + correctness = { + "qibojit_time_s": ref_time, + "fidelity": fidelity, + "l2_error": l2_error, + } + + return summary, correctness + + +def workload_shots(args): + circuit = make_circuit( + args.circuit, args.nqubits, args.nlayers, args.seed, add_measurements=True + ) + backend = prepare_quimb_backend( + ansatz=args.ansatz, + max_bond=args.max_bond, + svd_cutoff=args.svd_cutoff, + optimizer=args.optimizer, + n_most_frequent_states=args.topk, + ) + + def run_once(): + result = backend.execute_circuit(circuit, nshots=args.nshots) + return dict(result.frequencies()) + + samples, frequencies = time_callable( + run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort + ) + summary = summarize_samples(samples) + + correctness = None + if not args.no_compare: + ref_freq, ref_time = run_qibojit_shots(circuit, args.nshots) + correctness = { + "qibojit_time_s": ref_time, + "tvd": total_variation_distance(ref_freq, frequencies, args.nshots), + } + + return summary, correctness + + +def workload_convert(args): + circuit = make_circuit(args.circuit, args.nqubits, args.nlayers, args.seed) + backend = prepare_quimb_backend( + ansatz=args.ansatz, + max_bond=args.max_bond, + svd_cutoff=args.svd_cutoff, + optimizer=args.optimizer, + n_most_frequent_states=args.topk, + ) + + def run_once(): + quimb_circuit = backend._qibo_circuit_to_quimb( # pylint: disable=protected-access + circuit, + quimb_circuit_type=backend.circuit_ansatz, + gate_opts={"max_bond": backend.max_bond_dimension, "cutoff": backend.svd_cutoff}, + ) + return len(quimb_circuit.gates) + + samples, gate_count = time_callable( + run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort + ) + summary = summarize_samples(samples) + summary["gate_count"] = gate_count + return summary, None + + +def workload_expectation(args): + circuit = make_circuit(args.circuit, args.nqubits, args.nlayers, args.seed) + backend = prepare_quimb_backend( + ansatz=args.ansatz, + max_bond=args.max_bond, + svd_cutoff=args.svd_cutoff, + optimizer=args.optimizer, + n_most_frequent_states=args.topk, + ) + operators = ["z"] + sites = [(args.observable_qubit,)] + coeffs = [1.0] + + def run_once(): + return float( + backend.exp_value_observable_symbolic( + circuit, operators, sites, coeffs, args.nqubits + ) + ) + + samples, expval = time_callable( + run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort + ) + summary = summarize_samples(samples) + + correctness = None + if not args.no_compare: + ref_state, ref_time = run_qibojit_state(circuit) + correctness = { + "qibojit_time_s": ref_time, + "reference_expval": z_expectation_from_statevector( + ref_state, args.nqubits, args.observable_qubit + ), + "abs_error": abs( + z_expectation_from_statevector(ref_state, args.nqubits, args.observable_qubit) + - expval + ), + } + + return summary, correctness + + +def workload_raw_local_exp(args): + circuit = make_circuit(args.circuit, args.nqubits, args.nlayers, args.seed) + backend = prepare_quimb_backend( + ansatz=args.ansatz, + max_bond=args.max_bond, + svd_cutoff=args.svd_cutoff, + optimizer=args.optimizer, + n_most_frequent_states=args.topk, + ) + + def run_once(): + metrics = {} + t0 = time.perf_counter() + quimb_circuit = backend._qibo_circuit_to_quimb( # pylint: disable=protected-access + circuit, + quimb_circuit_type=backend.circuit_ansatz, + gate_opts={"max_bond": backend.max_bond_dimension, "cutoff": backend.svd_cutoff}, + ) + metrics["convert_s"] = time.perf_counter() - t0 + + operator = backend._string_to_quimb_operator("z") # pylint: disable=protected-access + if args.rehearse: + t1 = time.perf_counter() + rehearsal = quimb_circuit.local_expectation( + operator, + where=(args.observable_qubit,), + backend=backend.backend, + optimize=backend.contractions_optimizer, + simplify_sequence="R", + rehearse=True, + ) + metrics["rehearse_s"] = time.perf_counter() - t1 + optimize = rehearsal["tree"] + else: + metrics["rehearse_s"] = 0.0 + optimize = backend.contractions_optimizer + + t2 = time.perf_counter() + expval = quimb_circuit.local_expectation( + operator, + where=(args.observable_qubit,), + backend=backend.backend, + optimize=optimize, + simplify_sequence="R", + ) + metrics["contract_s"] = time.perf_counter() - t2 + metrics["total_inner_s"] = ( + metrics["convert_s"] + metrics["rehearse_s"] + metrics["contract_s"] + ) + metrics["expval"] = float(np.real(expval)) + return metrics + + samples, metrics = time_callable( + run_once, args.repeats, args.warmup, args.profile_output, args.profile_sort + ) + summary = summarize_samples(samples) + summary.update( + { + "convert_s": metrics["convert_s"], + "rehearse_s": metrics["rehearse_s"], + "contract_s": metrics["contract_s"], + "total_inner_s": metrics["total_inner_s"], + } + ) + + correctness = None + if not args.no_compare: + ref_state, ref_time = run_qibojit_state(circuit) + ref_expval = z_expectation_from_statevector( + ref_state, args.nqubits, args.observable_qubit + ) + correctness = { + "qibojit_time_s": ref_time, + "reference_expval": ref_expval, + "abs_error": abs(ref_expval - metrics["expval"]), + } + + return summary, correctness + + +WORKLOADS = { + "state": workload_state, + "shots": workload_shots, + "convert": workload_convert, + "expectation": workload_expectation, + "raw-local-exp": workload_raw_local_exp, +} + + +def build_parser(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--mode", + choices=sorted(WORKLOADS), + default="raw-local-exp", + help="Workload to benchmark.", + ) + parser.add_argument( + "--circuit", + choices=["ghz", "ising1d", "qaoa", "qft", "rcs", "variational"], + default="variational", + ) + parser.add_argument("--nqubits", type=int, default=10) + parser.add_argument("--nlayers", type=int, default=3) + parser.add_argument("--ansatz", choices=["mps", "tn"], default="tn") + parser.add_argument("--max-bond", type=int, default=None) + parser.add_argument("--svd-cutoff", type=float, default=1e-10) + parser.add_argument("--optimizer", type=str, default="auto-hq") + parser.add_argument("--observable-qubit", type=int, default=0) + parser.add_argument("--nshots", type=int, default=1024) + parser.add_argument("--topk", type=int, default=100) + parser.add_argument("--warmup", type=int, default=1) + parser.add_argument("--repeats", type=int, default=3) + parser.add_argument("--seed", type=int, default=42) + parser.add_argument("--quimb-num-procs", type=int, default=None) + parser.add_argument("--blas-threads", type=int, default=None) + parser.add_argument("--rehearse", action="store_true") + parser.add_argument("--no-compare", action="store_true") + parser.add_argument("--profile-output", type=Path, default=None) + parser.add_argument("--profile-sort", type=str, default="cumulative") + parser.add_argument("--json-output", type=Path, default=None) + return parser + + +def main(): + parser = build_parser() + args = parser.parse_args() + + configure_runtime_env(args.quimb_num_procs, args.blas_threads) + + print( + f"mode={args.mode} circuit={args.circuit} nqubits={args.nqubits} " + f"nlayers={args.nlayers} ansatz={args.ansatz} optimizer={args.optimizer}" + ) + if args.quimb_num_procs is not None or args.blas_threads is not None: + print( + "threads:" + f" QUIMB_NUM_PROCS={os.environ.get('QUIMB_NUM_PROCS')}" + f" OMP_NUM_THREADS={os.environ.get('OMP_NUM_THREADS')}" + ) + + workload = WORKLOADS[args.mode] + summary, correctness = workload(args) + + print("\nTiming") + for key, value in summary.items(): + if isinstance(value, float): + print(f"{key:>16}: {value:.6f}") + else: + print(f"{key:>16}: {value}") + + if correctness is not None: + print("\nCorrectness") + for key, value in correctness.items(): + if isinstance(value, float): + print(f"{key:>16}: {value:.6e}") + else: + print(f"{key:>16}: {value}") + + if args.profile_output is not None: + print(f"\nProfile written to: {args.profile_output}") + + if args.json_output is not None: + payload = {"timing": summary, "correctness": correctness, "args": vars(args)} + args.json_output.parent.mkdir(parents=True, exist_ok=True) + args.json_output.write_text(json.dumps(payload, indent=2, default=str), encoding="utf-8") + print(f"JSON written to: {args.json_output}") + + +if __name__ == "__main__": + main() diff --git a/src/qibotn/backends/quimb.py b/src/qibotn/backends/quimb.py index c378aef..5ece54f 100644 --- a/src/qibotn/backends/quimb.py +++ b/src/qibotn/backends/quimb.py @@ -186,7 +186,16 @@ def execute_circuit( else: frequencies = None measured_probabilities = None - + ''' + if return_array: + if self.ansatz == "mps": + psi = circ_quimb.psi + statevector = psi.to_dense().reshape(-1) + else: + statevector = circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer) + else: + statevector = None + ''' statevector = ( circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer) if return_array @@ -291,6 +300,15 @@ def _qibo_circuit_to_quimb( quimb_gate_name = GATE_MAP.get(gate_name, None) if quimb_gate_name == "measure": continue + if gate_name == "cu1": + theta = gate.parameters[0] + c, t = gate.qubits + circ.apply_gate("RZ", theta / 2, c) + circ.apply_gate("RZ", theta / 2, t) + circ.apply_gate("CNOT", c, t) + circ.apply_gate("RZ", -theta / 2, t) + circ.apply_gate("CNOT", c, t) + continue if quimb_gate_name is None: raise_error(ValueError, f"Gate {gate_name} not supported in Quimb backend.") diff --git a/src/qibotn/result.py b/src/qibotn/result.py index 6548ebe..2748cb0 100644 --- a/src/qibotn/result.py +++ b/src/qibotn/result.py @@ -57,10 +57,10 @@ class TensorNetworkResult: return self.measures def state(self): - """Return the statevector if the number of qubits is less than 20.""" + """Return the statevector if the number of qubits is less than 35.""" if self.nqubits < 35: return self.statevector raise_error( NotImplementedError, - f"Tensor network simulation cannot be used to reconstruct statevector for >= 20 .", + f"Tensor network simulation cannot be used to reconstruct statevector for >= 35 .", )