diff --git a/bench_profile.py b/bench_profile.py deleted file mode 100644 index 26f8ac7..0000000 --- a/bench_profile.py +++ /dev/null @@ -1,460 +0,0 @@ -"""Benchmark: qibotn/quimb generic TN — single-process torch profiling version.""" - -import os -import pickle -import time -import argparse -import numpy as np -import cotengra as ctg -import qibo -from qibo import Circuit, gates - - -def make_circuit(circuit_type, nqubits, nlayers=1): - c = Circuit(nqubits) - - if circuit_type == "qft": - from qibo.models import QFT - return QFT(nqubits) - - elif circuit_type == "variational": - for layer in range(nlayers): - for q in range(nqubits): - c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi))) - - offset = layer % 2 - for q in range(offset, nqubits - 1, 2): - c.add(gates.CZ(q, q + 1)) - - elif circuit_type == "ghz": - c.add(gates.H(0)) - for q in range(nqubits - 1): - c.add(gates.CNOT(q, q + 1)) - - elif circuit_type == "brickwork": - for q in range(nqubits): - c.add(gates.H(q)) - - for layer in range(nlayers): - offset = layer % 2 - for q in range(offset, nqubits - 1, 2): - c.add(gates.CNOT(q, q + 1)) - c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi))) - c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi))) - - else: - raise ValueError(f"Unknown circuit: {circuit_type}") - - return c - - -def make_z_observable(nqubits): - """Z on qubit 0 only — single contraction for benchmarking.""" - return ["z"], [(0,)], [1.0] - - -def export_profiler_outputs(prof, trace_path): - """Export Chrome trace and text table.""" - prof.export_chrome_trace(trace_path) - - table_path = trace_path.replace(".json", ".txt") - with open(table_path, "w") as f: - f.write( - prof.key_averages().table( - sort_by="self_cpu_time_total", - row_limit=200, - ) - ) - - print(f" [torch profiler trace] {trace_path}") - print(f" [torch profiler table] {table_path}") - - -def run_quimb_tn( - circuit, - nqubits, - num_slices, - load_path=None, - save_path=None, -): - """Mode: expval — compute via local_expectation.""" - qibo.set_backend("qibotn", platform="quimb") - b = qibo.get_backend() - b.configure_tn_simulation(ansatz="tn") - - operators, sites, coeffs = make_z_observable(nqubits) - ops = b._string_to_quimb_operator(operators[0]) - - qc = b._qibo_circuit_to_quimb( - circuit, - quimb_circuit_type=b.circuit_ansatz, - gate_opts={"max_bond": None, "cutoff": 1e-10}, - ) - - if load_path: - with open(load_path, "rb") as f: - saved = pickle.load(f) - tree = saved["tree"] - t_search = 0.0 - print(f" [path loaded] {load_path}") - else: - opt = ctg.HyperOptimizer( - methods=["kahypar", "random-greedy", "spinglass"], - max_repeats=16, - parallel=True, - max_time=60, - slicing_opts={"target_slices": num_slices}, - progbar=True, - ) - - t0 = time.time() - rehearsal = qc.local_expectation( - ops, - where=sites[0], - optimize=opt, - simplify_sequence="R", - rehearse=True, - ) - t_search = time.time() - t0 - tree = rehearsal["tree"] - - print( - f" [path search] {t_search:.3f}s " - f"flops~2^{tree.contraction_cost():.2f} " - f"size~2^{tree.contraction_width():.2f} " - f"slices={tree.multiplicity}" - ) - - if save_path: - with open(save_path, "wb") as f: - pickle.dump({"tree": tree}, f) - print(f" [path saved] {save_path}") - - t0 = time.time() - expval = qc.local_expectation( - ops, - where=sites[0], - optimize=tree, - simplify_sequence="R", - ) - t_contract = time.time() - t0 - - print(f" [contraction] {t_contract:.3f}s") - - return float(expval.real), t_search + t_contract - - -def run_quimb_tn_statevector( - circuit, - nqubits, - num_slices, - load_path=None, - save_path=None, - profile=False, - profile_dir="profiles", -): - """Mode: statevector — contract full TN to dense vector, single process.""" - qibo.set_backend("qibotn", platform="quimb") - b = qibo.get_backend() - b.configure_tn_simulation(ansatz="tn") - - import torch - - qc = b._qibo_circuit_to_quimb( - circuit, - quimb_circuit_type=b.circuit_ansatz, - gate_opts={"max_bond": None, "cutoff": 1e-10}, - ) - - # 让 quimb 生成 torch tensor,这样 torch.profiler 能抓到 aten op。 - qc.to_backend = torch.from_numpy - - if load_path: - with open(load_path, "rb") as f: - saved = pickle.load(f) - tree = saved["tree"] - t_search = 0.0 - print(f" [path loaded] {load_path}") - else: - opt = ctg.HyperOptimizer( - methods=["kahypar", "random-greedy", "spinglass"], - max_repeats=500, - parallel=48, - max_time=100, - minimize="size", - slicing_opts={"target_slices": num_slices}, - progbar=True, - ) - - t0 = time.time() - rehearsal = qc.to_dense(optimize=opt, rehearse=True) - t_search = time.time() - t0 - tree = rehearsal["tree"] - - print( - f" [path search] {t_search:.3f}s " - f"flops~2^{tree.contraction_cost():.2f} " - f"size~2^{tree.contraction_width():.2f} " - f"slices={tree.multiplicity}" - ) - - if save_path: - with open(save_path, "wb") as f: - pickle.dump({"tree": tree}, f) - print(f" [path saved] {save_path}") - - os.makedirs(profile_dir, exist_ok=True) - - if profile: - from torch.profiler import profile as torch_profile - from torch.profiler import ProfilerActivity, record_function - - trace_path = os.path.join( - profile_dir, - ( - f"trace_statevector_" - f"{circuit.nqubits}q_" - f"slices{tree.multiplicity}_" - f"{int(time.time())}.json" - ), - ) - - t0 = time.time() - - with torch_profile( - activities=[ProfilerActivity.CPU], - record_shapes=True, - profile_memory=True, - with_stack=True, - ) as prof: - with record_function("qibotn_to_dense_contraction"): - sv = qc.to_dense(optimize=tree).reshape(-1) - - with record_function("torch_to_numpy_view_or_copy"): - if type(sv).__module__.startswith("torch"): - sv_tn = sv.detach().cpu().numpy() - else: - sv_tn = np.asarray(sv) - - t_contract = time.time() - t0 - - export_profiler_outputs(prof, trace_path) - - else: - t0 = time.time() - sv = qc.to_dense(optimize=tree).reshape(-1) - t_contract = time.time() - t0 - - if type(sv).__module__.startswith("torch"): - sv_tn = sv.detach().cpu().numpy() - else: - sv_tn = np.asarray(sv) - - print(f" [contraction] {t_contract:.3f}s") - - return sv_tn, t_search + t_contract - - -def run_quimb_tn_samples(circuit, nshots=1024): - """Mode: samples — sample from circuit output distribution.""" - qibo.set_backend("qibotn", platform="quimb") - b = qibo.get_backend() - b.configure_tn_simulation(ansatz="tn") - - t0 = time.time() - result = b.execute_circuit(circuit, nshots=nshots) - t_total = time.time() - t0 - - print(f" [sampling] {t_total:.3f}s nshots={nshots}") - - try: - freqs = result.frequencies() - print(f" top states: {dict(list(freqs.items())[:5])}") - except Exception: - pass - - return result, t_total - - -def qibojit_expval(circuit, nqubits): - """Compute via qibojit statevector.""" - qibo.set_backend("qibojit", platform="numba") - - t0 = time.time() - result = circuit() - elapsed = time.time() - t0 - - sv = np.array(result.state(), dtype=complex).flatten() - probs = np.abs(sv) ** 2 - - bits = (np.arange(len(probs)) >> (nqubits - 1)) & 1 - expval = float(np.dot(probs, 1 - 2 * bits)) - - return expval, elapsed - - -def run_qibojit(circuit): - """Compute full statevector via qibojit.""" - qibo.set_backend("qibojit", platform="numba") - - t0 = time.time() - result = circuit() - elapsed = time.time() - t0 - - sv = np.array(result.state(), dtype=complex).flatten() - - return sv, elapsed - - -def main(): - parser = argparse.ArgumentParser() - - parser.add_argument("--nqubits", type=int, default=10) - parser.add_argument( - "--circuit", - type=str, - default="qft", - choices=["qft", "variational", "ghz", "brickwork"], - ) - parser.add_argument("--nlayers", type=int, default=3) - parser.add_argument("--num-slices", type=int, default=1) - parser.add_argument("--nshots", type=int, default=1024) - - parser.add_argument( - "--mode", - type=str, - default="statevector", - choices=["expval", "statevector", "samples"], - help="expval: local_expectation; statevector: to_dense; samples: sampling", - ) - - parser.add_argument( - "--no-compare", - action="store_true", - help="Skip qibojit reference run", - ) - - parser.add_argument( - "--save-path", - type=str, - default=None, - help="Save contraction tree to a pickle file", - ) - - parser.add_argument( - "--load-path", - type=str, - default=None, - help="Load contraction tree from a pickle file and skip path search", - ) - - parser.add_argument( - "--profile", - action="store_true", - help="Enable torch profiler for statevector contraction stage", - ) - - parser.add_argument( - "--profile-dir", - type=str, - default="profiles", - help="Directory to save torch profiler traces", - ) - - parser.add_argument( - "--save-statevector", - action="store_true", - help="Save TN statevector to data/sv_tn_*.npy", - ) - - args = parser.parse_args() - - print( - f"Circuit: {args.circuit}, " - f"nqubits={args.nqubits}, " - f"nlayers={args.nlayers}, " - f"mode={args.mode}, " - f"profile={args.profile}" - ) - - np.random.seed(42) - circuit_tn = make_circuit(args.circuit, args.nqubits, args.nlayers) - - try: - if args.mode == "expval": - expval_tn, t_tn = run_quimb_tn( - circuit_tn, - args.nqubits, - args.num_slices, - load_path=args.load_path, - save_path=args.save_path, - ) - - print(f"\n[quimb TN] time={t_tn:.4f}s ={expval_tn:.8f}") - - elif args.mode == "statevector": - sv_tn, t_tn = run_quimb_tn_statevector( - circuit_tn, - args.nqubits, - args.num_slices, - load_path=args.load_path, - save_path=args.save_path, - profile=args.profile, - profile_dir=args.profile_dir, - ) - - print( - f"\n[quimb TN] time={t_tn:.4f}s " - f"statevector shape={sv_tn.shape}" - ) - - if args.save_statevector: - os.makedirs("data", exist_ok=True) - out_path = f"data/sv_tn_{args.circuit}{args.nqubits}.npy" - np.save(out_path, sv_tn) - print(f"[saved statevector] {out_path}") - - else: - _, t_tn = run_quimb_tn_samples( - circuit_tn, - nshots=args.nshots, - ) - - print(f"\n[quimb TN] time={t_tn:.4f}s") - args.no_compare = True - - except Exception as e: - print(f"[quimb TN] FAILED: {e}") - raise - - if not args.no_compare and args.mode != "statevector": - np.random.seed(42) - circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) - - expval_ref, t_ref = qibojit_expval(circuit_ref, args.nqubits) - - print(f"[qibojit] time={t_ref:.4f}s ={expval_ref:.8f}") - print(f"\nDiff : {abs(expval_tn - expval_ref):.2e}") - - if t_tn > 0: - print(f"Speedup : {t_ref / t_tn:.2f}x") - - elif not args.no_compare and args.mode == "statevector" and sv_tn is not None: - np.random.seed(42) - circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) - - sv_ref, t_ref = run_qibojit(circuit_ref) - - fid = abs(np.dot(sv_ref.conj(), sv_tn)) ** 2 - l2_err = np.linalg.norm(sv_ref - sv_tn) - - print(f"[qibojit] time={t_ref:.4f}s") - print(f"Fidelity : {fid:.8f} (1=perfect)") - print(f"L2 error : {l2_err:.2e}") - - if t_tn > 0: - print(f"Speedup : {t_ref / t_tn:.2f}x") - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/benchmark_expectation.py b/benchmark_expectation.py new file mode 100644 index 0000000..4c70ecf --- /dev/null +++ b/benchmark_expectation.py @@ -0,0 +1,56 @@ +"""Expectation value benchmark with timing breakdown.""" +import time +import numpy as np +import torch +import qibo +from qibo import Circuit, gates, hamiltonians +from qibo.symbols import X, Z +import quimb as qu +from qibotn.parallel import parallel_path_search + +# Config +NQUBITS = 25 +NLAYERS = 10 +WORKERS = 96 + +# Build circuit +np.random.seed(42) +circuit = Circuit(NQUBITS) +for _ in range(NLAYERS): + for q in range(NQUBITS): + circuit.add(gates.RY(q, theta=np.random.uniform(0, 2*np.pi))) + circuit.add(gates.RZ(q, theta=np.random.uniform(0, 2*np.pi))) + for q in range(NQUBITS): + circuit.add(gates.CNOT(q % NQUBITS, (q + 1) % NQUBITS)) + +# Setup backend +qibo.set_backend("qibotn", platform="quimb") +backend = qibo.get_backend() +backend.configure_tn_simulation(ansatz="tn") +torch.set_num_threads(WORKERS) + +# Prepare TN +t0 = time.time() +qc = backend._qibo_circuit_to_quimb(circuit, backend.circuit_ansatz) +op = qu.pauli('x') & qu.pauli('z') +tn = qc.local_expectation(op, (0, 1), rehearse='tn') +t_prepare = time.time() - t0 + +# Search path +t0 = time.time() +tree = parallel_path_search(tn, tn.outer_inds(), 'processpool', 1024, 300, WORKERS) +t_search = time.time() - t0 + +# Contract +t0 = time.time() +for tensor in tn.tensors: + tensor._data = torch.from_numpy(np.asarray(tensor._data)).to(torch.complex128) +val = tn.contract(all, output_inds=(), optimize=tree) +t_contract = time.time() - t0 + +# Results +print(f"Prepare: {t_prepare:.4f}s") +print(f"Search: {t_search:.4f}s") +print(f"Contract: {t_contract:.4f}s") +print(f"Total: {t_prepare+t_search+t_contract:.4f}s") +print(f"Expectation: {0.5 * complex(val).real:.10f}") diff --git a/benchmark_mps.py b/benchmark_mps.py deleted file mode 100644 index 827fd77..0000000 --- a/benchmark_mps.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Benchmark: qibojit (reference) vs qibotn/quimb MPS, with error comparison.""" -import time -import argparse -import os -import numpy as np -import qibo -import quimb.tensor as qtn -from qibo import Circuit, gates - -DATA_DIR = os.path.join(os.path.dirname(__file__), "data") - - -def make_circuit(circuit_type, nqubits, nlayers=1, add_measurements=False): - c = Circuit(nqubits) - if circuit_type == "qft": - from qibo.models import QFT - c = QFT(nqubits) - elif circuit_type == "variational": - for layer in range(nlayers): - for q in range(nqubits): - c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi))) - offset = layer % 2 - for q in range(offset, nqubits - 1, 2): - c.add(gates.CZ(q, q + 1)) - elif circuit_type == "ghz": - c.add(gates.H(0)) - for q in range(nqubits - 1): - c.add(gates.CNOT(q, q + 1)) - else: - raise ValueError(f"Unknown circuit: {circuit_type}") - if add_measurements: - c.add(gates.M(*range(nqubits))) - return c - - -def run_qibojit(circuit): - qibo.set_backend("qibojit", platform="numba") - t0 = time.time() - result = circuit() - elapsed = time.time() - t0 - sv = result.state() - return sv, elapsed - - -def run_quimb_mps(circuit, max_bond, svd_cutoff, optimizer, nshots=None): - qibo.set_backend("qibotn", platform="quimb") - b = qibo.get_backend() - b.configure_tn_simulation(ansatz="mps", max_bond_dimension=max_bond, svd_cutoff=svd_cutoff) - b.contractions_optimizer = optimizer - - t0 = time.time() - if nshots: - result = b.execute_circuit(circuit, nshots=nshots) - elapsed = time.time() - t0 - return result.frequencies(), elapsed, 0.0 - else: - # MPS simulation - circ_quimb = qtn.CircuitMPS.from_openqasm2_str( - circuit.to_qasm(), - gate_opts={"max_bond": max_bond, "cutoff": svd_cutoff}, - ) - t_mps = time.time() - t0 - # to_dense separately - t1 = time.time() - #sv = circ_quimb.psi.to_dense().reshape(-1) - sv = None - t_dense = time.time() - t1 - return sv, t_mps, t_dense - - -def run_quimb_permmps(circuit, max_bond, svd_cutoff, nshots=None): - gates_list = [ - qtn.Gate(g.name, params=list(g.parameters), qubits=list(g.qubits)) - for g in circuit.queue - if g.name.lower() != "measure" - ] - t0 = time.time() - circ = qtn.CircuitPermMPS.from_gates( - gates_list, - N=circuit.nqubits, - max_bond=max_bond, - cutoff=svd_cutoff, - ) - if nshots: - from collections import Counter - result = Counter(circ.sample(nshots)) - elapsed = time.time() - t0 - return dict(result), elapsed - else: - mps = circ.get_psi_unordered() - sv = mps.to_dense().reshape(-1) - elapsed = time.time() - t0 - return sv, elapsed - - -def compare_statevector(sv_ref, sv_mps): - sv_ref = np.array(sv_ref, dtype=complex).flatten() - sv_mps = np.array(sv_mps, dtype=complex).flatten() - fidelity = abs(np.dot(sv_ref.conj(), sv_mps)) ** 2 - l2_err = np.linalg.norm(sv_ref - sv_mps) - return fidelity, l2_err - - -def compare_frequencies(freq_ref, freq_mps, nshots): - all_keys = set(freq_ref) | set(freq_mps) - tvd = 0.5 * sum(abs(freq_ref.get(k, 0) - freq_mps.get(k, 0)) for k in all_keys) / nshots - return tvd - - -def jit_cache_path(circuit_type, nqubits, nlayers): - os.makedirs(DATA_DIR, exist_ok=True) - return os.path.join(DATA_DIR, f"jit_{circuit_type}_n{nqubits}_l{nlayers}.npy") - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--nqubits", type=int, default=10) - parser.add_argument("--circuit", type=str, default="ghz", - choices=["qft", "variational", "ghz"]) - parser.add_argument("--nlayers", type=int, default=3) - parser.add_argument("--max-bond", type=int, default=None, - help="Max bond dimension for MPS (None = unlimited)") - parser.add_argument("--svd-cutoff", type=float, default=1e-6) - parser.add_argument("--optimizer", type=str, default="eager") - parser.add_argument("--nshots", type=int, default=None, - help="Use sampling mode with given number of shots instead of statevector") - parser.add_argument("--permmps", action="store_true", - help="Use CircuitPermMPS directly instead of qibotn backend") - parser.add_argument("--skip-jit", action="store_true", - help="Skip qibojit run, load cached statevector if available") - parser.add_argument("--no-compare", action="store_true", - help="Skip correctness comparison entirely") - args = parser.parse_args() - - print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}") - print(f"MPS config: max_bond={args.max_bond}, svd_cutoff={args.svd_cutoff}, optimizer={args.optimizer}") - - ref = None - t_ref = None - - if not args.no_compare: - cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers) - if args.nshots: - # frequency mode: run qibojit with same nshots - np.random.seed(42) - circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers, add_measurements=True) - qibo.set_backend("qibojit", platform="numba") - t0 = time.time() - result_ref = circuit_ref(nshots=args.nshots) - t_ref = time.time() - t0 - ref = dict(result_ref.frequencies()) - print(f"\n[qibojit] time={t_ref:.4f}s") - elif args.skip_jit and os.path.exists(cache_path): - ref = np.load(cache_path) - print(f"\n[qibojit] loaded from cache: {cache_path}") - else: - np.random.seed(42) - circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) - ref, t_ref = run_qibojit(circuit_ref) - np.save(cache_path, ref) - print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})") - - np.random.seed(42) - circuit_mps = make_circuit(args.circuit, args.nqubits, args.nlayers) - label = "quimb PermMPS" if args.permmps else "quimb MPS" - try: - if args.permmps: - out, t_mps = run_quimb_permmps(circuit_mps, args.max_bond, args.svd_cutoff, args.nshots) - t_dense = 0.0 - else: - out, t_mps, t_dense = run_quimb_mps(circuit_mps, args.max_bond, args.svd_cutoff, args.optimizer, args.nshots) - print(f"[{label}] MPS sim={t_mps:.4f}s to_dense={t_dense:.4f}s total={t_mps+t_dense:.4f}s") - if not args.no_compare: - if args.nshots: - tvd = compare_frequencies(ref, out, args.nshots) - print(f"\nTVD : {tvd:.6f} (0=perfect)") - else: - fidelity, l2_err = compare_statevector(ref, out) - print(f"\nFidelity : {fidelity:.8f} (1=perfect)") - print(f"L2 error : {l2_err:.2e}") - if t_ref is not None and t_mps > 0: - print(f"Speedup : {t_ref/t_mps:.2f}x") - except Exception as e: - print(f"[quimb MPS] FAILED: {e}") - raise - - -if __name__ == "__main__": - main() diff --git a/benchmark_qmatchatea.py b/benchmark_qmatchatea.py deleted file mode 100644 index 9e7923b..0000000 --- a/benchmark_qmatchatea.py +++ /dev/null @@ -1,126 +0,0 @@ -"""Benchmark: qibojit (reference) vs qibotn/qmatchatea MPS.""" -import time -import argparse -import os -import numpy as np -import qibo -from qibo import Circuit, gates -from qibo.backends import construct_backend - -DATA_DIR = os.path.join(os.path.dirname(__file__), "data") - - -def make_circuit(circuit_type, nqubits, nlayers=1): - c = Circuit(nqubits) - if circuit_type == "qft": - from qibo.models import QFT - return QFT(nqubits) - elif circuit_type == "variational": - for layer in range(nlayers): - for q in range(nqubits): - c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi))) - offset = layer % 2 - for q in range(offset, nqubits - 1, 2): - c.add(gates.CZ(q, q + 1)) - elif circuit_type == "ghz": - c.add(gates.H(0)) - for q in range(nqubits - 1): - c.add(gates.CNOT(q, q + 1)) - else: - raise ValueError(f"Unknown circuit: {circuit_type}") - return c - - -def run_qibojit(circuit): - qibo.set_backend("qibojit", platform="numba") - t0 = time.time() - result = circuit() - elapsed = time.time() - t0 - return result.state(), elapsed - - -def run_qmatchatea(circuit, max_bond, cut_ratio): - import qmatchatea, qtealeaves.observables - from qibo.backends import construct_backend as _cb - b = _cb(backend="qibotn", platform="qmatchatea") - b.configure_tn_simulation(ansatz="MPS", max_bond_dimension=max_bond, cut_ratio=cut_ratio) - - qk_circuit = b._qibocirc_to_qiskitcirc(circuit) - run_qk_params = qmatchatea.preprocessing.qk_transpilation_params(False) - observables = qtealeaves.observables.TNObservables() - observables += qtealeaves.observables.TNState2File(name="temp", formatting="D") - - t0 = time.time() - results = qmatchatea.run_simulation( - circ=qk_circuit, - convergence_parameters=b.convergence_params, - transpilation_parameters=run_qk_params, - backend=b.qmatchatea_backend, - observables=observables, - ) - elapsed = time.time() - t0 - tn_state = results.observables.get("tn_state") - if tn_state is None: - results.load_state() - tn_state = results.observables["tn_state"] - sv_obj = tn_state.to_statevector(qiskit_order=False, max_qubit_equivalent=40) - sv = np.array(sv_obj.elem, dtype=complex).flatten() - return sv, elapsed - - -def compare(sv_ref, sv_mps): - sv_ref = np.array(sv_ref, dtype=complex).flatten() - fidelity = abs(np.dot(sv_ref.conj(), sv_mps)) ** 2 - l2_err = np.linalg.norm(sv_ref - sv_mps) - return fidelity, l2_err - - -def jit_cache_path(circuit_type, nqubits, nlayers): - os.makedirs(DATA_DIR, exist_ok=True) - return os.path.join(DATA_DIR, f"jit_{circuit_type}_n{nqubits}_l{nlayers}.npy") - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--nqubits", type=int, default=10) - parser.add_argument("--circuit", type=str, default="ghz", - choices=["qft", "variational", "ghz"]) - parser.add_argument("--nlayers", type=int, default=3) - parser.add_argument("--max-bond", type=int, default=64) - parser.add_argument("--cut-ratio", type=float, default=1e-6) - parser.add_argument("--skip-jit", action="store_true") - args = parser.parse_args() - - print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}") - print(f"MPS config: max_bond={args.max_bond}, cut_ratio={args.cut_ratio}") - - cache_path = jit_cache_path(args.circuit, args.nqubits, args.nlayers) - t_ref = None - - if args.skip_jit and os.path.exists(cache_path): - sv_ref = np.load(cache_path) - print(f"\n[qibojit] loaded from cache: {cache_path}") - else: - np.random.seed(42) - circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) - sv_ref, t_ref = run_qibojit(circuit_ref) - np.save(cache_path, sv_ref) - print(f"\n[qibojit] time={t_ref:.4f}s (saved to {cache_path})") - - np.random.seed(42) - circuit_mps = make_circuit(args.circuit, args.nqubits, args.nlayers) - try: - sv_mps, t_mps = run_qmatchatea(circuit_mps, args.max_bond, args.cut_ratio) - fidelity, l2_err = compare(sv_ref, sv_mps) - print(f"[qmatchatea] time={t_mps:.4f}s") - print(f"\nFidelity : {fidelity:.8f} (1=perfect)") - print(f"L2 error : {l2_err:.2e}") - if t_ref is not None and t_mps > 0: - print(f"Speedup : {t_ref/t_mps:.2f}x") - except Exception as e: - print(f"[qmatchatea] FAILED: {e}") - raise - - -if __name__ == "__main__": - main() diff --git a/benchmark_tn.py b/benchmark_tn.py deleted file mode 100644 index e0e9848..0000000 --- a/benchmark_tn.py +++ /dev/null @@ -1,386 +0,0 @@ -"""Benchmark: qibotn/quimb generic TN — expectation values.""" -import multiprocessing -multiprocessing.set_start_method("spawn", force=True) -import pickle -import time -import threading -import argparse -import numpy as np -import cotengra as ctg -import qibo -from qibo import Circuit, gates - -class TimedTrialFn: - def __init__(self, trial_fn, timeout=15): - self.trial_fn = trial_fn - self.timeout = timeout - - def __call__(self, *args, **kwargs): - result = [None] - exc = [None] - - def _run(): - try: - result[0] = self.trial_fn(*args, **kwargs) - except Exception as e: - exc[0] = e - - t = threading.Thread(target=_run, daemon=True) - t.start() - t.join(self.timeout) - if t.is_alive(): - raise TimeoutError(f"trial exceeded {self.timeout}s") - if exc[0] is not None: - raise exc[0] - return result[0] - -def make_circuit(circuit_type, nqubits, nlayers=1): - c = Circuit(nqubits) - if circuit_type == "qft": - from qibo.models import QFT - return QFT(nqubits) - elif circuit_type == "variational": - for layer in range(nlayers): - for q in range(nqubits): - c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi))) - offset = layer % 2 - for q in range(offset, nqubits - 1, 2): - c.add(gates.CZ(q, q + 1)) - elif circuit_type == "ghz": - c.add(gates.H(0)) - for q in range(nqubits - 1): - c.add(gates.CNOT(q, q + 1)) - elif circuit_type == "brickwork": - for q in range(nqubits): - c.add(gates.H(q)) - for layer in range(nlayers): - offset = layer % 2 - for q in range(offset, nqubits - 1, 2): - c.add(gates.CNOT(q, q + 1)) - c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi))) - c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi))) - else: - raise ValueError(f"Unknown circuit: {circuit_type}") - return c - - - -def make_z_observable(nqubits): - """Z on qubit 0 only — single contraction for benchmarking""" - return ["z"], [(0,)], [1.0] - - -def run_quimb_tn(circuit, nqubits, num_slices, load_path=None, save_path=None): - """Mode: expval — compute via local_expectation (lightcone pruning).""" - qibo.set_backend("qibotn", platform="quimb") - b = qibo.get_backend() - b.configure_tn_simulation(ansatz="tn") - - operators, sites, coeffs = make_z_observable(nqubits) - ops = b._string_to_quimb_operator(operators[0]) - qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz, - gate_opts={"max_bond": None, "cutoff": 1e-10}) - - if load_path: - with open(load_path, "rb") as f: - saved = pickle.load(f) - tree = saved["tree"] - t_search = 0.0 - print(f" [path loaded] {load_path}") - else: - opt = ctg.HyperOptimizer( - methods=['kahypar', 'random-greedy', 'spinglass'], - max_repeats=16, - parallel=True, - max_time=60, - slicing_opts={'target_slices': num_slices}, - progbar=True, - ) - t0 = time.time() - rehearsal = qc.local_expectation(ops, where=sites[0], optimize=opt, - simplify_sequence="R", rehearse=True) - t_search = time.time() - t0 - tree = rehearsal['tree'] - print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}") - - if save_path: - with open(save_path, "wb") as f: - pickle.dump({"tree": tree}, f) - print(f" [path saved] {save_path}") - - t0 = time.time() - expval = qc.local_expectation(ops, where=sites[0], optimize=tree, simplify_sequence="R") - t_contract = time.time() - t0 - print(f" [contraction] {t_contract:.3f}s") - - return float(expval.real), t_search + t_contract - - -def run_quimb_tn_statevector(circuit, nqubits, num_slices, load_path=None, save_path=None): - """Mode: statevector — contract full TN to dense vector.""" - qibo.set_backend("qibotn", platform="quimb") - b = qibo.get_backend() - b.configure_tn_simulation(ansatz="tn") - - import torch - qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz, - gate_opts={"max_bond": None, "cutoff": 1e-10}) - qc.to_backend = lambda x: torch.from_numpy(x).to(torch.complex64) - if load_path: - with open(load_path, "rb") as f: - saved = pickle.load(f) - tree = saved["tree"] - t_search = 0.0 - print(f" [path loaded] {load_path}") - else: - opt = ctg.HyperOptimizer( - #methods=['kahypar', 'random-greedy', 'spinglass'], - max_repeats=1024, - #parallel="concurrent.futures", - parallel=64, - max_time=60, - minimize='size', - slicing_opts={'target_slices': num_slices}, - #slicing_opts={'target_size': 2**30}, - progbar=True, - on_trial_error='ignore' - ) - - t0 = time.time() - rehearsal = qc.to_dense(optimize=opt, rehearse=True) - t_search = time.time() - t0 - tree = rehearsal['tree'] - #print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}") - - if save_path: - with open(save_path, "wb") as f: - pickle.dump({"tree": tree}, f) - print(f" [path saved] {save_path}") - print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f}") - return None, t_search - - t0 = time.time() - sv = qc.to_dense(optimize=tree).reshape(-1) - t_contract = time.time() - t0 - print(f" [contraction] {t_contract:.3f}s") - sv_tn = np.array(sv) - return sv_tn, t_search + t_contract - - -def _contract_mpi(tree, arrays, comm, root=0): - """Contract slices via MPI, returning result as the same array type as input. - - Unlike ``cotengra.ContractionTree.contract_mpi``, this works with any - array backend (numpy, torch, etc.) — it only converts to numpy at the - MPI-reduce boundary and converts back. - """ - size = comm.Get_size() - rank = comm.Get_rank() - - result_np = None - is_torch = type(arrays[0]).__module__.startswith("torch") - - for i in range(rank, tree.multiplicity, size): - x = tree.contract_slice(arrays, i) - x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x)) - - if result_np is None: - result_np = x_np - else: - result_np += x_np - - if result_np is None: - result_np = np.zeros(1, dtype=np.complex64) - - if rank == root: - result = np.zeros_like(result_np) - else: - result = None - comm.Reduce(result_np, result, root=root) - - if rank == root: - import torch - return torch.from_numpy(np.asarray(result)) if is_torch else result - return None - - -def run_quimb_tn_statevector_mpi(circuit, nqubits, num_slices, load_path=None, save_path=None): - """MPI-parallel statevector via custom MPI contraction (supports torch backend).""" - from mpi4py import MPI - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - size = comm.Get_size() - - qibo.set_backend("qibotn", platform="quimb") - b = qibo.get_backend() - b.configure_tn_simulation(ansatz="tn") - - import torch - qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz, - gate_opts={"max_bond": None, "cutoff": 1e-10}) - qc.to_backend = lambda x: torch.from_numpy(x).to(torch.complex64) - - if load_path: - if rank == 0: - with open(load_path, "rb") as f: - saved = pickle.load(f) - tree, psi, t_search = saved["tree"], saved["psi"], 0.0 - print(f" [path loaded] {load_path}") - else: - tree = psi = None - t_search = 0.0 - else: - # each rank runs serial search over its share of trials - total_repeats = 1024 - rank_repeats = max(1, total_repeats // size) - opt = ctg.HyperOptimizer( - methods=['kahypar', 'random-greedy', 'spinglass'], - max_repeats=rank_repeats, - parallel=False, - max_time=100, - minimize='size', - slicing_opts={'target_slices': max(num_slices, size), 'allow_outer': False}, - progbar=(rank == 0), - ) - t0 = time.time() - rehearsal = qc.to_dense(optimize=opt, rehearse=True) - t_search = time.time() - t0 - local_tree = rehearsal['tree'] - local_psi = rehearsal['tn'] - local_size = local_tree.contraction_width() - - # gather all trees to rank 0, pick best by contraction_width - all_results = comm.gather((local_size, local_tree, local_psi), root=0) - if rank == 0: - _, tree, psi = min(all_results, key=lambda x: x[0]) - print(f" [path search] {t_search:.3f}s flops~2^{tree.contraction_cost():.2f} size~2^{tree.contraction_width():.2f} slices={tree.multiplicity}") - if save_path: - with open(save_path, "wb") as f: - pickle.dump({"tree": tree, "psi": psi}, f) - print(f" [path saved] {save_path}") - else: - tree = psi = None - - tree = comm.bcast(tree, root=0) - psi = comm.bcast(psi, root=0) - t_search = comm.bcast(t_search, root=0) - - arrays = psi.arrays - t0 = time.time() - sv = _contract_mpi(tree, arrays, comm, root=0) - t_contract = time.time() - t0 - - if rank == 0: - print(f" [contraction] {t_contract:.3f}s") - return np.array(sv).reshape(-1), t_search + t_contract - return None, t_search + t_contract - - -def run_quimb_tn_samples(circuit, nshots=1024): - """Mode: samples — sample from circuit output distribution.""" - qibo.set_backend("qibotn", platform="quimb") - b = qibo.get_backend() - b.configure_tn_simulation(ansatz="tn") - - t0 = time.time() - result = b.execute_circuit(circuit, nshots=nshots) - t_total = time.time() - t0 - print(f" [sampling] {t_total:.3f}s nshots={nshots}") - print(f" top states: {dict(list(result.frequencies().items())[:5])}") - return result, t_total - - -def qibojit_expval(circuit, nqubits): - """Compute via qibojit statevector.""" - qibo.set_backend("qibojit", platform="numba") - t0 = time.time() - result = circuit() - elapsed = time.time() - t0 - sv = np.array(result.state(), dtype=complex).flatten() - probs = np.abs(sv) ** 2 - bits = (np.arange(len(probs)) >> (nqubits - 1)) & 1 - expval = float(np.dot(probs, 1 - 2 * bits)) - return expval, elapsed - - -def run_qibojit(circuit): - """Compute full statevector via qibojit.""" - qibo.set_backend("qibojit", platform="numba") - t0 = time.time() - result = circuit() - elapsed = time.time() - t0 - sv = np.array(result.state(), dtype=complex).flatten() - return sv, elapsed - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument("--nqubits", type=int, default=10) - parser.add_argument("--circuit", type=str, default="qft", - choices=["qft", "variational", "ghz", "brickwork"]) - parser.add_argument("--nlayers", type=int, default=3) - parser.add_argument("--num-slices", type=int, default=1) - parser.add_argument("--nshots", type=int, default=1024) - parser.add_argument("--mode", type=str, default="statevector", - choices=["expval", "statevector", "samples"], - help="expval: local_expectation; statevector: to_dense; samples: sampling") - parser.add_argument("--mpi", action="store_true", - help="Use MPI-parallel contraction (run with mpirun -n N)") - parser.add_argument("--no-compare", action="store_true", - help="Skip qibojit reference run") - parser.add_argument("--save-path", type=str, default=None, - help="Save contraction tree to a pickle file") - parser.add_argument("--load-path", type=str, default=None, - help="Load contraction tree from a pickle file (skip path search)") - args = parser.parse_args() - - print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, nlayers={args.nlayers}, mode={args.mode}") - - np.random.seed(42) - circuit_tn = make_circuit(args.circuit, args.nqubits, args.nlayers) - try: - if args.mode == "expval": - expval_tn, t_tn = run_quimb_tn(circuit_tn, args.nqubits, args.num_slices, - load_path=args.load_path, save_path=args.save_path) - print(f"\n[quimb TN] time={t_tn:.4f}s ={expval_tn:.8f}") - elif args.mode == "statevector": - if args.mpi: - sv_tn, t_tn = run_quimb_tn_statevector_mpi(circuit_tn, args.nqubits, args.num_slices, - load_path=args.load_path, save_path=args.save_path) - else: - sv_tn, t_tn = run_quimb_tn_statevector(circuit_tn, args.nqubits, args.num_slices, - load_path=args.load_path, save_path=args.save_path) - if sv_tn is not None: - print(f"\n[quimb TN] time={t_tn:.4f}s statevector shape={sv_tn.shape}") - np.save(f"data/sv_tn_{args.circuit}{args.nqubits}.npy", sv_tn) - else: - _, t_tn = run_quimb_tn_samples(circuit_tn, args.nqubits, args.nshots) - print(f"\n[quimb TN] time={t_tn:.4f}s") - args.no_compare = True # samples 模式无法和 qibojit 期望值对比 - except Exception as e: - print(f"[quimb TN] FAILED: {e}") - raise - - if not args.no_compare and args.mode != "statevector": - np.random.seed(42) - circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) - expval_ref, t_ref = qibojit_expval(circuit_ref, args.nqubits) - print(f"[qibojit] time={t_ref:.4f}s ={expval_ref:.8f}") - print(f"\nDiff : {abs(expval_tn - expval_ref):.2e}") - if t_tn > 0: - print(f"Speedup : {t_ref/t_tn:.2f}x") - elif not args.no_compare and args.mode == "statevector" and sv_tn is not None: - np.random.seed(42) - circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) - sv_ref, t_ref = run_qibojit(circuit_ref) - fid = abs(np.dot(sv_ref.conj(), sv_tn)) ** 2 - l2_err = np.linalg.norm(sv_ref - sv_tn) - print(f"[qibojit] time={t_ref:.4f}s") - print(f"Fidelity : {fid:.8f} (1=perfect)") - print(f"L2 error : {l2_err:.2e}") - if t_tn > 0: - print(f"Speedup : {t_ref/t_tn:.2f}x") - - -if __name__ == "__main__": - main() diff --git a/benchmark_tn_mpi.py b/benchmark_tn_mpi.py index ae76eca..8dc80d1 100644 --- a/benchmark_tn_mpi.py +++ b/benchmark_tn_mpi.py @@ -1,4 +1,5 @@ """MPI-parallel TN benchmark: path search + contraction via MPI.""" +import json import pickle import time import argparse @@ -8,9 +9,38 @@ import qibo from qibo import Circuit, gates from mpi4py import MPI from concurrent.futures import ProcessPoolExecutor, as_completed +from qibotn.observables import check_observable, extract_gates_and_qubits -def _run_serial_search(tn_bytes, output_inds, repeats, seed, num_slices, n_ranks, max_time=600): +def _load_observable(observable_file=None, observable_json=None): + if observable_file: + with open(observable_file, "r", encoding="utf8") as f: + return json.load(f) + if observable_json: + return json.loads(observable_json) + return None + + +def _term_to_quimb_operator(term): + """Convert one extracted Hamiltonian term to a quimb operator.""" + import quimb as qu + + coeff = complex(term[0][2]) if term else 1.0 + op = None + where = [] + + for qubit, gate_name, _ in term: + qubit = int(qubit) + gate_name = str(gate_name).upper() + if gate_name == "I": + continue + where.append(qubit) + op = qu.pauli(gate_name.lower()) if op is None else op & qu.pauli(gate_name.lower()) + + return complex(coeff), op, tuple(where) + + +def _run_serial_search(tn_bytes, output_inds, repeats, seed, num_slices, n_ranks, max_time): import pickle, cotengra as ctg, random random.seed(seed) tn = pickle.loads(tn_bytes) @@ -29,10 +59,14 @@ def _run_serial_search(tn_bytes, output_inds, repeats, seed, num_slices, n_ranks def parallel_search(tn, output_inds, total_repeats, n_workers, num_slices, n_ranks, - timeout=60): + timeout): import pickle, os, signal from concurrent.futures import ProcessPoolExecutor, as_completed tn_bytes = pickle.dumps(tn) + if n_workers <= 1: + return _run_serial_search( + tn_bytes, output_inds, total_repeats, 0, num_slices, n_ranks, timeout + )[1] repeats_per = max(1, total_repeats // n_workers) best_cost, best_tree = float('inf'), None @@ -152,7 +186,7 @@ def run_mpi(circuit, nqubits, num_slices, total_repeats=1024, psi_tn = qc.to_dense(rehearse="tn") local_tree = parallel_search( psi_tn, psi_tn.outer_inds(), rank_repeats, n_workers=48, - num_slices=num_slices, n_ranks=size, timeout=60, + num_slices=num_slices, n_ranks=size, timeout=600, ) t_search = time.time() - t0 local_psi = psi_tn @@ -181,7 +215,7 @@ def run_mpi(circuit, nqubits, num_slices, total_repeats=1024, # --- contraction: all ranks work in parallel --- import torch - torch.set_num_threads(max(1, 48 // size)) + torch.set_num_threads(max(1, 96 // size)) arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in psi.arrays] t0 = time.time() sv = _contract_mpi(tree, arrays, comm, root=0) @@ -193,6 +227,72 @@ def run_mpi(circuit, nqubits, num_slices, total_repeats=1024, return None, t_search + t_contract +def run_mpi_expval( + circuit, + nqubits, + observable=None, + total_repeats=1024, + search_workers=1, + search_timeout=300, +): + """Compute a Hamiltonian expectation value directly from TN via MPI. + MPI parallelizes over Hamiltonian terms; ProcessPool optionally helps + path search for each term.""" + import torch + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + + qibo.set_backend("qibotn", platform="quimb") + b = qibo.get_backend() + b.configure_tn_simulation(ansatz="tn") + + observable = check_observable(observable, nqubits) + ham_gate_map = extract_gates_and_qubits(observable) + + qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz, + gate_opts={"max_bond": None, "cutoff": 1e-10}) + + my_terms = ham_gate_map[rank::size] + torch.set_num_threads(max(1, 96 // size)) + t0 = time.time() + + my_exp = 0.0 + 0.0j + for term in my_terms: + coeff, op, where = _term_to_quimb_operator(term) + if op is None: + my_exp += coeff + continue + tn = qc.local_expectation_tn(op, where=where) + if len(tn.outer_inds()) == 0: + val = complex(tn.contract()) + else: + tree = parallel_search( + tn, + tn.outer_inds(), + total_repeats, + n_workers=search_workers, + num_slices=1, + n_ranks=size, + timeout=search_timeout, + ) + if tree is None: + raise RuntimeError("Failed to find a contraction tree for expectation TN.") + arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in tn.arrays] + acc = sum(tree.contract_slice(arrays, i) for i in range(tree.multiplicity)) + val = complex(acc.item() if hasattr(acc, 'item') else acc) + my_exp += coeff * val + + t_total = time.time() - t0 + + all_results = comm.gather(my_exp, root=0) + if rank == 0: + total_exp = sum(all_results) + print(f"\n[TN expval] time={t_total:.4f}s expval={total_exp.real:.12f}") + return np.real_if_close(total_exp), t_total + return None, t_total + + def main(): parser = argparse.ArgumentParser() parser.add_argument("--nqubits", type=int, default=30) @@ -201,9 +301,14 @@ def main(): parser.add_argument("--nlayers", type=int, default=3) parser.add_argument("--num-slices", type=int, default=1) parser.add_argument("--total-repeats", type=int, default=1024) + parser.add_argument("--search-workers", type=int, default=1) + parser.add_argument("--search-timeout", type=int, default=300) + parser.add_argument("--observable-file", type=str, default=None) + parser.add_argument("--observable-json", type=str, default=None) parser.add_argument("--save-path", type=str, default=None) parser.add_argument("--load-path", type=str, default=None) parser.add_argument("--no-compare", action="store_true") + parser.add_argument("--mode", type=str, default="sv", choices=["sv", "expval"]) args = parser.parse_args() comm = MPI.COMM_WORLD @@ -215,6 +320,27 @@ def main(): np.random.seed(42) circuit = make_circuit(args.circuit, args.nqubits, args.nlayers) + observable = _load_observable(args.observable_file, args.observable_json) + + if args.mode == "expval": + try: + expval, t_total = run_mpi_expval( + circuit, + args.nqubits, + observable=observable, + total_repeats=args.total_repeats, + search_workers=args.search_workers, + search_timeout=args.search_timeout, + ) + except Exception as e: + if rank == 0: + print(f"[FAILED] {e}") + raise + if rank == 0: + np.save(f"data/expval_tn_{args.circuit}{args.nqubits}.npy", np.asarray(expval)) + if not args.no_compare: + print("No built-in reference comparison for arbitrary observables.") + return try: sv, t_total = run_mpi(circuit, args.nqubits, args.num_slices, @@ -230,14 +356,20 @@ def main(): np.save(f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy", sv) if not args.no_compare: - from benchmark_tn import run_qibojit + from qibotn.bak.benchmark_tn import run_qibojit + import gc np.random.seed(42) circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers) sv_ref, t_ref = run_qibojit(circuit_ref) - fid = abs(np.dot(sv_ref.conj(), sv)) ** 2 + np.save(f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy", sv_ref) print(f"[qibojit] time={t_ref:.4f}s") - print(f"Fidelity : {fid:.8f}") - print(f"L2 error : {np.linalg.norm(sv_ref - sv):.2e}") + # free memory before loading via mmap for expval comparison + del sv, sv_ref + gc.collect() + from compare_jit_tn_quimb import check_results + ref_path = f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy" + tn_path = f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy" + check_results(ref_path, tn_path, args.nqubits) if t_total > 0: print(f"Speedup : {t_ref/t_total:.2f}x") diff --git a/compare_jit_tn_quimb.py b/compare_jit_tn_quimb.py deleted file mode 100644 index d56132a..0000000 --- a/compare_jit_tn_quimb.py +++ /dev/null @@ -1,51 +0,0 @@ -import numpy as np -import os -import sys - -def check_results(ref_path, tn_path): - # 1. 检查文件是否存在 - if not os.path.exists(ref_path) or not os.path.exists(tn_path): - print(f"Error: 找不到文件!\n参考文件: {ref_path}\n待测文件: {tn_path}") - return - - print(f"正在加载数据并对比: \n [Ref] {ref_path}\n [TN ] {tn_path}\n") - - try: - # 2. 加载状态向量 - # mmap_mode='r' 可以防止大文件直接撑爆内存 - sv_ref = np.load(ref_path, mmap_mode='r') - sv_tn = np.load(tn_path, mmap_mode='r') - - # 3. 计算保真度 (Fidelity) - # fid = ||^2 - inner_product = np.dot(sv_ref.conj(), sv_tn) - fidelity = np.abs(inner_product)**2 - - # 4. 计算 L2 误差 (欧氏距离) - l2_error = np.linalg.norm(sv_ref - sv_tn) - - # 5. 打印结果 - print("-" * 30) - print(f"保真度 (Fidelity): {fidelity:.12f}") - #print(f"L2 范数误差: {l2_error:.2e}") - print("-" * 30) - - # phase-invariant L2: align global phase first - phase = inner_product / np.abs(inner_product) - l2_phase_corrected = np.linalg.norm(sv_ref - sv_tn / phase) - print(f"L2 误差(相位校正后): {l2_phase_corrected:.2e}") - - if fidelity > 0.999999: - print("✅ 验证通过:结果高度一致。") - else: - print("❌ 警告:保真度较低,请检查收缩路径或截断误差。") - - except Exception as e: - print(f"计算过程中发生错误: {e}") - -if __name__ == "__main__": - # 你可以在这里直接修改文件名 - REF_FILE = 'data/sv_qibojit_qft30.npy' - TN_FILE = 'data/sv_tn_qft30_mpi.npy' - - check_results(REF_FILE, TN_FILE) \ No newline at end of file diff --git a/hostfile b/hostfile deleted file mode 100644 index aa46b20..0000000 --- a/hostfile +++ /dev/null @@ -1,2 +0,0 @@ -10.20.6.74:1 -10.20.6.102:1 diff --git a/inspect_path.py b/inspect_path.py deleted file mode 100644 index aa83a22..0000000 --- a/inspect_path.py +++ /dev/null @@ -1,21 +0,0 @@ -import pickle -import sys - -path = sys.argv[1] if len(sys.argv) > 1 else 'path/path.pkl.34.mpi' - -with open(path, 'rb') as f: - d = pickle.load(f) -tree = d['tree'] - -width = tree.contraction_width() -slices = tree.multiplicity -sliced_width = width - (slices.bit_length() - 1) - -print(f"Path: {path}") -print(f"Width (unsliced): {width:.1f}") -print(f"Slices: {slices}") -print(f"Sliced width: {sliced_width:.1f}") -print(f"Peak memory (total): {2**width * 16 / 1e9:.1f} GB") -print(f"Peak per slice: {2**sliced_width * 16 / 1e9:.2f} GB") -print(f"Sliced indices: {len(tree.sliced_inds)}") -print(f"Cost (log2 flops): {tree.contraction_cost(log=True):.2f}") diff --git a/run_qibojit_ref.py b/run_qibojit_ref.py deleted file mode 100644 index 58a9acf..0000000 --- a/run_qibojit_ref.py +++ /dev/null @@ -1,18 +0,0 @@ -"""Run qibojit on 30-qubit QFT and save statevector for comparison.""" -import time -import numpy as np -import qibo -from qibo.models import QFT - -#np.random.seed(42) -circuit = QFT(32) - -qibo.set_backend("qibojit", platform="numba") -t0 = time.time() -result = circuit() -elapsed = time.time() - t0 - -sv = np.array(result.state(), dtype=complex).flatten() -np.save("data/sv_qibojit_qft30.npy", sv) -print(f"[qibojit] time={elapsed:.4f}s shape={sv.shape}") -print(f"Saved to sv_qibojit_qft30.npy") diff --git a/src/qibotn/backends/quimb.py b/src/qibotn/backends/quimb.py index 5ece54f..3868eec 100644 --- a/src/qibotn/backends/quimb.py +++ b/src/qibotn/backends/quimb.py @@ -352,6 +352,149 @@ def _string_to_quimb_operator(self, op_str): return op +def expectation(self, circuit, observable, parallel=None, parallel_opts=None): + """ + Compute expectation value with optional parallel acceleration. + + Parameters + ---------- + circuit : qibo.models.Circuit + The quantum circuit. + observable : qibo.hamiltonians.SymbolicHamiltonian or form + The observable to measure. + parallel : str, optional + Parallelization method: 'mpi', 'processpool', or None (default). + parallel_opts : dict, optional + Options for parallel execution: + - max_repeats: int (default 1024) + - max_time: int (default 300) + - search_workers: int (default 48, processpool only) + - mpi_contract: bool (default False, use MPI for contraction) + + Returns + ------- + float + The expectation value. + """ + from qibotn.observables import check_observable, extract_gates_and_qubits + + if parallel_opts is None: + parallel_opts = {} + + observable = check_observable(observable, circuit.nqubits) + + if parallel is None: + # Use original implementation + from qibotn.observables import extract_gates_and_qubits + all_terms = extract_gates_and_qubits(observable) + + qc = self._qibo_circuit_to_quimb( + circuit, + quimb_circuit_type=self.circuit_ansatz, + gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff}, + ) + + exp_val = 0.0 + for coeff, factors in all_terms: + op = None + where = [] + for qubit, gate_name in factors: + p = qu.pauli(gate_name.lower()) + op = p if op is None else op & p + where.append(qubit) + + val = qc.local_expectation( + op, tuple(where), + backend=self.backend, + optimize=self.contractions_optimizer, + simplify_sequence="ADCRS", + simplify_atol=1e-12, + ) + exp_val += coeff * val + + return self.real(exp_val) + + else: + # Use parallel implementation + return self._expectation_parallel(circuit, observable, parallel, parallel_opts) + + +def _expectation_parallel(self, circuit, observable, method, opts): + """Parallel expectation value computation.""" + from qibotn.observables import extract_gates_and_qubits + from qibotn.parallel import parallel_path_search, parallel_contract + import torch + + try: + from mpi4py import MPI + comm = MPI.COMM_WORLD if method == 'mpi' else None + rank = comm.Get_rank() if comm else 0 + size = comm.Get_size() if comm else 1 + except ImportError: + comm, rank, size = None, 0, 1 + + max_repeats = opts.get('max_repeats', 1024) + max_time = opts.get('max_time', 300) + search_workers = opts.get('search_workers', 48) + mpi_contract = opts.get('mpi_contract', False) + torch_threads = opts.get('torch_threads', None) + + qc = self._qibo_circuit_to_quimb( + circuit, + quimb_circuit_type=self.circuit_ansatz, + gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff}, + ) + + all_terms = extract_gates_and_qubits(observable) + my_terms = all_terms[rank::size] + + if method == 'mpi' and comm: + torch.set_num_threads(max(1, 96 // size)) + elif torch_threads: + torch.set_num_threads(torch_threads) + + my_exp = 0.0 + for coeff, factors in my_terms: + op = None + where = [] + for qubit, gate_name in factors: + p = qu.pauli(gate_name.lower()) + op = p if op is None else op & p + where.append(qubit) + + tn = qc.local_expectation(op, tuple(where), rehearse='tn') + + tree = parallel_path_search( + tn, tn.outer_inds(), + method=method, + total_repeats=max_repeats, + max_time=max_time, + n_workers=search_workers + ) + + if tree is None: + continue + + if mpi_contract and comm and size > 1: + arrays = [self.engine.asarray(a) for a in tn.arrays] + val = parallel_contract(tree, arrays, method='mpi', comm=comm) + else: + for tensor in tn.tensors: + tensor._data = torch.from_numpy(self.engine.asarray(tensor._data)).to(torch.complex128) + val = complex(tn.contract(all, output_inds=(), optimize=tree)) + + my_exp += coeff * complex(val) + + if comm: + all_exp = comm.gather(my_exp, root=0) + if rank == 0: + total_exp = sum(all_exp) + return self.real(total_exp) + return 0.0 + + return self.real(my_exp) + + CLASSES_ROOTS = {"numpy": "Numpy", "torch": "PyTorch", "jax": "Jax"} METHODS = { @@ -362,6 +505,8 @@ METHODS = { "exp_value_observable_symbolic": exp_value_observable_symbolic, "_qibo_circuit_to_quimb": _qibo_circuit_to_quimb, "_string_to_quimb_operator": _string_to_quimb_operator, + "expectation": expectation, + "_expectation_parallel": _expectation_parallel, "circuit_ansatz": circuit_ansatz, } diff --git a/src/qibotn/eval.py b/src/qibotn/eval.py index 680c817..ae36893 100644 --- a/src/qibotn/eval.py +++ b/src/qibotn/eval.py @@ -4,83 +4,16 @@ from cupy.cuda import nccl from cupy.cuda.runtime import getDeviceCount from cuquantum.tensornet import Network, contract from mpi4py import MPI -from qibo import hamiltonians -from qibo.symbols import I, X, Y, Z from qibotn.circuit_convertor import QiboCircuitToEinsum from qibotn.circuit_to_mps import QiboCircuitToMPS from qibotn.mps_contraction_helper import MPSContractionHelper - - -def check_observable(observable, circuit_nqubit): - """Checks the type of observable and returns the appropriate Hamiltonian.""" - if observable is None: - return build_observable(circuit_nqubit) - elif isinstance(observable, dict): - return create_hamiltonian_from_dict(observable, circuit_nqubit) - elif isinstance(observable, hamiltonians.SymbolicHamiltonian): - # TODO: check if the observable is compatible with the circuit - return observable - else: - raise TypeError("Invalid observable type.") - - -def build_observable(circuit_nqubit): - """Helper function to construct a target observable.""" - hamiltonian_form = 0 - for i in range(circuit_nqubit): - hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit) - - hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form) - return hamiltonian - - -def create_hamiltonian_from_dict(data, circuit_nqubit): - """Create a Qibo SymbolicHamiltonian from a dictionary representation. - - Ensures that each Hamiltonian term explicitly acts on all circuit qubits - by adding identity (`I`) gates where needed. - - Args: - data (dict): Dictionary containing Hamiltonian terms. - circuit_nqubit (int): Total number of qubits in the quantum circuit. - - Returns: - hamiltonians.SymbolicHamiltonian: The constructed Hamiltonian. - """ - PAULI_GATES = {"X": X, "Y": Y, "Z": Z} - - terms = [] - - for term in data["terms"]: - coeff = term["coefficient"] - operators = term["operators"] # List of tuples like [("Z", 0), ("X", 1)] - - # Convert the operator list into a dictionary {qubit_index: gate} - operator_dict = {q: PAULI_GATES[g] for g, q in operators} - - # Build the full term ensuring all qubits are covered - full_term_expr = [ - operator_dict[q](q) if q in operator_dict else I(q) - for q in range(circuit_nqubit) - ] - - # Multiply all operators together to form a single term - term_expr = full_term_expr[0] - for op in full_term_expr[1:]: - term_expr *= op - - # Scale by the coefficient - final_term = coeff * term_expr - terms.append(final_term) - - if not terms: - raise ValueError("No valid Hamiltonian terms were added.") - - # Combine all terms - hamiltonian_form = sum(terms) - - return hamiltonians.SymbolicHamiltonian(hamiltonian_form) +from qibotn.observables import ( + build_observable, + check_observable, + create_hamiltonian_from_dict, + extract_gates_and_qubits, +) def get_ham_gates(pauli_map, dtype="complex128", backend=cp): @@ -111,45 +44,6 @@ def get_ham_gates(pauli_map, dtype="complex128", backend=cp): return gates -def extract_gates_and_qubits(hamiltonian): - """ - Extracts the gates and their corresponding qubits from a Qibo Hamiltonian. - - Parameters: - hamiltonian (qibo.hamiltonians.Hamiltonian or qibo.hamiltonians.SymbolicHamiltonian): - A Qibo Hamiltonian object. - - Returns: - list of tuples: [(coefficient, [(gate, qubit), ...]), ...] - - coefficient: The prefactor of the term. - - list of (gate, qubit): Each term's gates and the qubits they act on. - """ - extracted_terms = [] - - if isinstance(hamiltonian, hamiltonians.SymbolicHamiltonian): - for term in hamiltonian.terms: - coeff = term.coefficient # Extract coefficient - gate_qubit_list = [] - - # Extract gate and qubit information - for factor in term.factors: - gate_name = str(factor)[ - 0 - ] # Extract the gate type (X, Y, Z) from 'X0', 'Z1' - qubit = int(str(factor)[1:]) # Extract the qubit index - gate_qubit_list.append((qubit, gate_name, coeff)) - coeff = 1.0 - - extracted_terms.append(gate_qubit_list) - - else: - raise ValueError( - "Unsupported Hamiltonian type. Must be SymbolicHamiltonian or Hamiltonian." - ) - - return extracted_terms - - def initialize_mpi(): """Initialize MPI communication and device selection.""" comm = MPI.COMM_WORLD diff --git a/src/qibotn/observables.py b/src/qibotn/observables.py new file mode 100644 index 0000000..309ace4 --- /dev/null +++ b/src/qibotn/observables.py @@ -0,0 +1,71 @@ +"""Observable helpers shared by tensor-network backends and benchmarks.""" + +from qibo import hamiltonians +from qibo.symbols import I, X, Y, Z + + +def check_observable(observable, circuit_nqubit): + """Checks the type of observable and returns the appropriate Hamiltonian.""" + if observable is None: + return build_observable(circuit_nqubit) + if isinstance(observable, dict): + return create_hamiltonian_from_dict(observable, circuit_nqubit) + if isinstance(observable, hamiltonians.SymbolicHamiltonian): + return observable + raise TypeError("Invalid observable type.") + + +def build_observable(circuit_nqubit): + """Construct the default benchmark observable used by qibotn.""" + hamiltonian_form = 0 + for i in range(circuit_nqubit): + hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit) + + return hamiltonians.SymbolicHamiltonian(form=hamiltonian_form) + + +def create_hamiltonian_from_dict(data, circuit_nqubit): + """Create a Qibo SymbolicHamiltonian from the qibotn dict representation.""" + pauli_gates = {"X": X, "Y": Y, "Z": Z} + terms = [] + + for term in data["terms"]: + coeff = term["coefficient"] + operators = term["operators"] + operator_dict = {q: pauli_gates[g] for g, q in operators} + + full_term_expr = [ + operator_dict[q](q) if q in operator_dict else I(q) + for q in range(circuit_nqubit) + ] + + term_expr = full_term_expr[0] + for op in full_term_expr[1:]: + term_expr *= op + + terms.append(coeff * term_expr) + + if not terms: + raise ValueError("No valid Hamiltonian terms were added.") + + return hamiltonians.SymbolicHamiltonian(sum(terms)) + + +def extract_gates_and_qubits(hamiltonian): + """Extract per-term Pauli factors from a Qibo SymbolicHamiltonian. + + Returns list of terms, where each term is (coefficient, [(qubit, gate_name), ...]). + """ + extracted_terms = [] + + if not isinstance(hamiltonian, hamiltonians.SymbolicHamiltonian): + raise ValueError( + "Unsupported Hamiltonian type. Must be SymbolicHamiltonian or Hamiltonian." + ) + + for term in hamiltonian.terms: + coeff = term.coefficient + factors = [(int(str(f)[1:]), str(f)[0]) for f in term.factors] + extracted_terms.append((coeff, factors)) + + return extracted_terms diff --git a/src/qibotn/parallel.py b/src/qibotn/parallel.py new file mode 100644 index 0000000..eab6b43 --- /dev/null +++ b/src/qibotn/parallel.py @@ -0,0 +1,148 @@ +"""Parallel path search and contraction utilities for tensor networks.""" +import os +import pickle +import signal +import numpy as np +from concurrent.futures import ProcessPoolExecutor, as_completed + +try: + from mpi4py import MPI + _HAVE_MPI = True +except ImportError: + _HAVE_MPI = False + MPI = None + + +def _serial_search(tn_bytes, output_inds, repeats, seed, max_time): + """Single-process path search with cotengra.""" + import random + import cotengra as ctg + random.seed(seed) + tn = pickle.loads(tn_bytes) + opt = ctg.HyperOptimizer( + methods=["kahypar", "kahypar-agglom", "spinglass"], + max_repeats=repeats, + parallel=False, + minimize="combo-256", + max_time=max_time, + optlib="random", + progbar=False, + ) + tree = tn.contraction_tree(optimize=opt, output_inds=output_inds) + return tree.combo_cost(factor=256), tree + + +def parallel_path_search(tn, output_inds, method='processpool', total_repeats=1024, + max_time=300, n_workers=48): + """Parallel contraction path search. + + Args: + tn: Tensor network (quimb TensorNetwork) + output_inds: Output indices for contraction + method: 'processpool' or 'mpi' + total_repeats: Total optimization repeats + max_time: Timeout per worker (seconds) + n_workers: Number of workers (processpool only) + + Returns: + Best contraction tree found + """ + if method == 'mpi': + if not _HAVE_MPI: + raise ImportError("mpi4py not available") + return _mpi_search(tn, output_inds, total_repeats, max_time) + elif method == 'processpool': + return _processpool_search(tn, output_inds, total_repeats, n_workers, max_time) + else: + raise ValueError(f"Unknown method: {method}") + + +def _processpool_search(tn, output_inds, total_repeats, n_workers, max_time): + """ProcessPool-based parallel search.""" + tn_bytes = pickle.dumps(tn) + repeats_per = max(1, total_repeats // n_workers) + pool = ProcessPoolExecutor(max_workers=n_workers) + futures = [ + pool.submit(_serial_search, tn_bytes, output_inds, repeats_per, seed, max_time) + for seed in range(n_workers) + ] + best_cost, best_tree = float("inf"), None + try: + for fut in as_completed(futures, timeout=max_time + 5): + try: + cost, tree = fut.result() + if cost < best_cost: + best_cost, best_tree = cost, tree + except Exception: + pass + except TimeoutError: + pass + finally: + for fut in futures: + fut.cancel() + for pid in list(pool._processes.keys()): + try: + os.kill(pid, signal.SIGKILL) + except ProcessLookupError: + pass + pool.shutdown(wait=False) + return best_tree + + +def _mpi_search(tn, output_inds, total_repeats, max_time): + """MPI-based parallel search across ranks.""" + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + size = comm.Get_size() + tn_bytes = pickle.dumps(tn) + repeats_per = max(1, total_repeats // size) + local_cost, local_tree = _serial_search(tn_bytes, output_inds, repeats_per, rank, max_time) + all_results = comm.gather((local_cost, local_tree), root=0) + best_tree = None + if rank == 0: + best_cost = float("inf") + for cost, tree in all_results: + if tree is not None and cost < best_cost: + best_cost, best_tree = cost, tree + best_tree = comm.bcast(best_tree, root=0) + return best_tree + + +def parallel_contract(tree, arrays, method='mpi', comm=None): + """Parallel sliced contraction. + + Args: + tree: Contraction tree + arrays: List of tensor arrays + method: 'mpi' (only MPI supported for now) + comm: MPI communicator (required for MPI) + + Returns: + Contracted result (on root rank for MPI) + """ + if method == 'mpi': + if not _HAVE_MPI or comm is None: + raise ValueError("MPI method requires mpi4py and comm") + return _contract_mpi(tree, arrays, comm) + else: + raise ValueError(f"Unknown method: {method}") + + +def _contract_mpi(tree, arrays, comm, root=0): + """Distribute contraction slices across MPI ranks.""" + rank = comm.Get_rank() + size = comm.Get_size() + is_torch = type(arrays[0]).__module__.startswith("torch") + + result_np = None + for i in range(rank, tree.multiplicity, size): + x = tree.contract_slice(arrays, i) + x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x)) + result_np = x_np if result_np is None else result_np + x_np + + if result_np is None: + result_np = np.zeros(1, dtype=np.complex128) + + result = np.zeros_like(result_np) if rank == root else None + comm.Reduce(result_np, result, root=root) + return result diff --git a/sweep_bond_32q.py b/sweep_bond_32q.py deleted file mode 100644 index 5aae456..0000000 --- a/sweep_bond_32q.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Bond dimension sweep for 32-qubit variational circuit.""" -import os -import sys -import numpy as np - -sys.path.insert(0, os.path.dirname(__file__)) -from benchmark_mps import make_circuit, run_qibojit, run_quimb_mps, compare, jit_cache_path, DATA_DIR - -NQUBITS = 32 -NLAYERS = 5 -BOND_VALUES = [1, 8, 16, 32, 64, 128, 256] -SVD_CUTOFF = 1e-6 -OPTIMIZER = "auto-hq" - -if __name__ == "__main__": - cache_path = jit_cache_path("variational", NQUBITS, NLAYERS) - - if os.path.exists(cache_path): - sv_ref = np.load(cache_path) - print(f"[qibojit] loaded from cache: {cache_path}\n") - else: - np.random.seed(42) - circuit_ref = make_circuit("variational", NQUBITS, NLAYERS) - sv_ref, t_ref = run_qibojit(circuit_ref) - np.save(cache_path, sv_ref) - print(f"[qibojit] time={t_ref:.4f}s (saved to {cache_path})\n") - - print(f"{'bond':>6} {'time(s)':>10} {'fidelity':>12} {'l2_err':>10}") - print("-" * 46) - - for bond in BOND_VALUES: - np.random.seed(42) - circuit_mps = make_circuit("variational", NQUBITS, NLAYERS) - try: - sv_mps, t_mps = run_quimb_mps(circuit_mps, bond, SVD_CUTOFF, OPTIMIZER) - fidelity, l2_err = compare(sv_ref, sv_mps) - print(f"{bond:>6} {t_mps:>10.4f} {fidelity:>12.8f} {l2_err:>10.2e}") - except Exception as e: - print(f"{bond:>6} FAILED: {e}") diff --git a/tests/test_expectation.py b/tests/test_expectation.py index d89c6f9..0cd6ac2 100644 --- a/tests/test_expectation.py +++ b/tests/test_expectation.py @@ -35,7 +35,7 @@ def test_observable_expval(backend, nqubits): numpy_backend = construct_backend("numpy") ham, ham_form = build_observable(nqubits) circ = build_circuit(nqubits=nqubits, nlayers=1) - + exact_expval = numpy_backend.calculate_expectation_state( hamiltonian=ham, state=circ().state(),