完善mps的vidal机制,多节点并行;补充tn搜索时dask集群搜索的方式
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
This commit is contained in:
18
tools/README.md
Normal file
18
tools/README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Tools
|
||||
|
||||
Auxiliary scripts for profiling, legacy comparisons, and scale probes.
|
||||
|
||||
The main CPU expectation entrypoint is `../benchmark_cpu_expectation.py`.
|
||||
For the current Vidal/MPS 1D-chain tests, prefer `../run_vidal_mps_cases.sh`.
|
||||
|
||||
Files here are intentionally secondary:
|
||||
|
||||
- `compare_vidal_backend_qmatchatea.py`: diagnostic comparison against QMatchaTea.
|
||||
- `profile_vidal_chrome.py`: PyTorch CPU profiler for the Vidal path.
|
||||
- `run_cpu_single_cases.sh`: single-node scale probes.
|
||||
- `run_cpu_large_cases.sh`: two-node MPI scale probes.
|
||||
- `run_vidal_segment_mpi_scan.sh`: rank/thread scaling scan for Vidal segmented MPI.
|
||||
- `baseline_mps_expectation.py`: legacy MPS comparison CLI kept for old commands.
|
||||
- `benchmark_tn_mpi.py`, `benchmark_search.py`, `benchmark_slice.py`, `benchmark_contract_sliced.py`, `check_tree.py`: old TN path-search/slicing experiments.
|
||||
- `qibojit_reference_expectation.py`: state-vector reference helper.
|
||||
- `validate_vidal_mpi_correctness.py`: focused Vidal MPI correctness helper.
|
||||
183
tools/baseline_mps_expectation.py
Normal file
183
tools/baseline_mps_expectation.py
Normal file
@@ -0,0 +1,183 @@
|
||||
"""MPS expectation benchmark for qmatchatea and Vidal backends."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
from qibotn.benchmark_cases import (
|
||||
build_circuit as build_benchmark_circuit,
|
||||
exact_pauli_sum,
|
||||
observable_terms,
|
||||
terms_to_dict,
|
||||
)
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal_tebd import run_vidal_ring_xz
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
return build_benchmark_circuit("brickwall_cnot", nqubits, nlayers, seed)
|
||||
|
||||
|
||||
def build_observable(nqubits):
|
||||
return terms_to_dict(observable_terms("ring_xz", nqubits))
|
||||
|
||||
|
||||
def exact_expectation(circuit, nqubits):
|
||||
return exact_pauli_sum(circuit, observable_terms("ring_xz", nqubits), nqubits)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=40)
|
||||
parser.add_argument("--nlayers", type=int, default=30)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument(
|
||||
"--executor",
|
||||
choices=("qmatchatea", "vidal", "vidal-mpi"),
|
||||
default="qmatchatea",
|
||||
)
|
||||
parser.add_argument("--mpi-ct", action="store_true")
|
||||
parser.add_argument("--mpi-barriers", type=int, default=-1)
|
||||
parser.add_argument("--mpi-isometrization", type=int, default=-1)
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--reference-file")
|
||||
parser.add_argument(
|
||||
"--mpi-rank-map",
|
||||
action="store_true",
|
||||
help="Print MPI rank, host, pid, and torch thread placement metadata.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
logging.getLogger("qibo.config").setLevel(logging.ERROR)
|
||||
logging.getLogger("qtealeaves").setLevel(logging.ERROR)
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
rank = 0
|
||||
size = 1
|
||||
if args.mpi_ct:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
size = MPI.COMM_WORLD.Get_size()
|
||||
if args.mpi_rank_map:
|
||||
rank_info = {
|
||||
"rank": rank,
|
||||
"size": size,
|
||||
"host": socket.gethostname(),
|
||||
"pid": os.getpid(),
|
||||
"torch_threads": args.torch_threads,
|
||||
"omp_num_threads": os.environ.get("OMP_NUM_THREADS", ""),
|
||||
"mkl_num_threads": os.environ.get("MKL_NUM_THREADS", ""),
|
||||
}
|
||||
rank_infos = MPI.COMM_WORLD.gather(rank_info, root=0)
|
||||
if rank == 0:
|
||||
print("mpi_rank_map")
|
||||
for item in sorted(rank_infos, key=lambda row: row["rank"]):
|
||||
print(
|
||||
"rank={rank} size={size} host={host} pid={pid} "
|
||||
"torch_threads={torch_threads} "
|
||||
"OMP_NUM_THREADS={omp_num_threads} "
|
||||
"MKL_NUM_THREADS={mkl_num_threads}".format(**item)
|
||||
)
|
||||
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed)
|
||||
observable = build_observable(args.nqubits)
|
||||
exact = None
|
||||
if args.reference_file:
|
||||
with open(args.reference_file, "r", encoding="utf-8") as f:
|
||||
exact = float(json.load(f)["expectation"])
|
||||
elif args.exact:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_expectation(circuit, args.nqubits)
|
||||
|
||||
if rank == 0:
|
||||
if args.mpi_ct and args.executor in ("vidal", "vidal-mpi"):
|
||||
mpi_label = f"VidalSegment/{size}"
|
||||
else:
|
||||
mpi_label = f"MPIMPS/{size}" if args.mpi_ct else "SR"
|
||||
print(
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={args.bond} seed={args.seed} "
|
||||
f"tensor_module={args.tensor_module} svd_control=E! "
|
||||
f"compile_circuit=True mpi={mpi_label} executor={args.executor}"
|
||||
)
|
||||
if exact is not None:
|
||||
print(f"exact={exact:.16e}")
|
||||
print("expval abs_error rel_error seconds")
|
||||
|
||||
start = time.perf_counter()
|
||||
timings = None
|
||||
if args.executor in ("vidal", "vidal-mpi"):
|
||||
if args.executor == "vidal-mpi" and not args.mpi_ct:
|
||||
raise ValueError("--executor vidal-mpi requires --mpi-ct.")
|
||||
if args.mpi_ct:
|
||||
from qibotn.backends.vidal_mpi_segment import run_segment_vidal_mpi_ring_xz
|
||||
|
||||
value, timings = run_segment_vidal_mpi_ring_xz(
|
||||
circuit,
|
||||
max_bond=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=args.tensor_module,
|
||||
comm=MPI.COMM_WORLD,
|
||||
)
|
||||
else:
|
||||
value = run_vidal_ring_xz(
|
||||
circuit,
|
||||
max_bond=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=args.tensor_module,
|
||||
)
|
||||
else:
|
||||
backend = QMatchaTeaBackend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
svd_control="E!",
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
mpi_approach="CT" if args.mpi_ct else "SR",
|
||||
mpi_num_procs=size,
|
||||
mpi_where_barriers=args.mpi_barriers if args.mpi_ct else -1,
|
||||
mpi_isometrization=args.mpi_isometrization,
|
||||
)
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=False,
|
||||
compile_circuit=True,
|
||||
)
|
||||
max_timings = None
|
||||
if timings:
|
||||
max_timings = {
|
||||
key: MPI.COMM_WORLD.reduce(local_value, op=MPI.MAX, root=0)
|
||||
for key, local_value in timings.items()
|
||||
}
|
||||
if rank != 0:
|
||||
return
|
||||
value = float(np.real(value))
|
||||
elapsed = time.perf_counter() - start
|
||||
abs_error = float("nan") if exact is None else abs(value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
print(f"{value:.16e} {abs_error:.6e} {rel_error:.6e} {elapsed:.3f}")
|
||||
if max_timings:
|
||||
print("timing_section max_seconds")
|
||||
for key, max_value in max_timings.items():
|
||||
print(f"{key} {max_value:.6f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
56
tools/benchmark_contract_sliced.py
Normal file
56
tools/benchmark_contract_sliced.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""MPI parallel sliced contraction using pre-sliced tree."""
|
||||
import time, pickle, os
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
|
||||
NQUBITS, NLAYERS, NCORES = 25, 10, 48
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
|
||||
os.environ['OMP_NUM_THREADS'] = str(NCORES)
|
||||
os.environ['MKL_NUM_THREADS'] = str(NCORES)
|
||||
|
||||
import torch
|
||||
import qibo, quimb as qu
|
||||
from qibotn.observables import build_random_circuit
|
||||
|
||||
torch.set_num_threads(NCORES)
|
||||
|
||||
circuit = build_random_circuit(NQUBITS, NLAYERS)
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
backend = qibo.get_backend()
|
||||
backend.configure_tn_simulation(ansatz="tn")
|
||||
qc = backend._qibo_circuit_to_quimb(circuit, backend.circuit_ansatz)
|
||||
tn = qc.local_expectation(qu.pauli('x') & qu.pauli('z'), (0, 1), rehearse='tn')
|
||||
|
||||
if rank == 0:
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
else:
|
||||
tree = None
|
||||
tree = comm.bcast(tree, root=0)
|
||||
|
||||
arrays = [torch.from_numpy(np.asarray(t._data)) for t in tn.tensors]
|
||||
n_slices = tree.multiplicity
|
||||
|
||||
if rank == 0:
|
||||
print(f"Slices: {n_slices}, Ranks: {size}, "
|
||||
f"Peak: {tree.max_size() * 16 / 1e9:.2f} GB, "
|
||||
f"Threads/rank: {NCORES}, Backend: torch")
|
||||
|
||||
t0 = time.time()
|
||||
result = None
|
||||
for i in range(rank, n_slices, size):
|
||||
val = tree.contract_slice(arrays, i, backend='torch')
|
||||
val_np = val.cpu().numpy().reshape(-1)
|
||||
result = val_np if result is None else result + val_np
|
||||
|
||||
if result is None:
|
||||
result = np.zeros(1, dtype=np.complex128)
|
||||
|
||||
total = np.zeros_like(result) if rank == 0 else None
|
||||
comm.Reduce(result, total, root=0)
|
||||
|
||||
if rank == 0:
|
||||
print(f"Contract: {time.time() - t0:.4f}s Expectation: {0.5 * total[0].real:.10f}")
|
||||
34
tools/benchmark_search.py
Normal file
34
tools/benchmark_search.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""Search contraction path and save."""
|
||||
import time, os, pickle
|
||||
from qibotn.parallel import parallel_path_search
|
||||
from qibotn.observables import build_random_circuit
|
||||
import qibo, quimb as qu
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
NQUBITS, NLAYERS, WORKERS = 20, 10, 96
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
method = 'mpi' if size > 1 else 'processpool'
|
||||
|
||||
circuit = build_random_circuit(NQUBITS, NLAYERS)
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
backend = qibo.get_backend()
|
||||
backend.configure_tn_simulation(ansatz="tn")
|
||||
qc = backend._qibo_circuit_to_quimb(circuit, backend.circuit_ansatz)
|
||||
tn = qc.local_expectation(qu.pauli('x') & qu.pauli('z'), (0, 1), rehearse='tn')
|
||||
|
||||
if rank == 0:
|
||||
print(f"Searching {NQUBITS}q {NLAYERS}l, method={method}, ranks={size}, workers/rank={WORKERS}...")
|
||||
t0 = time.time()
|
||||
tree = parallel_path_search(tn, tn.outer_inds(), method=method,
|
||||
total_repeats=1024, max_time=300, n_workers=WORKERS,trial_timeout=60)
|
||||
t_search = time.time() - t0
|
||||
|
||||
if rank == 0:
|
||||
os.makedirs('data', exist_ok=True)
|
||||
path = f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl"
|
||||
with open(path, 'wb') as f:
|
||||
pickle.dump(tree, f)
|
||||
print(f"Search: {t_search:.2f}s Peak: {tree.max_size() * 16 / 1e9:.2f} GB Saved: {path}")
|
||||
16
tools/benchmark_slice.py
Normal file
16
tools/benchmark_slice.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Slice saved tree and save."""
|
||||
import pickle
|
||||
|
||||
NQUBITS, NLAYERS = 25, 10
|
||||
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl", 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
|
||||
print(f"Original peak: {tree.max_size() * 16 / 1e9:.2f} GB")
|
||||
|
||||
tree_sliced = tree.slice_and_reconfigure(target_size=2**28)
|
||||
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'wb') as f:
|
||||
pickle.dump(tree_sliced, f)
|
||||
|
||||
print(f"Sliced peak: {tree_sliced.max_size() * 16 / 1e9:.2f} GB Slices: {tree_sliced.multiplicity}")
|
||||
378
tools/benchmark_tn_mpi.py
Normal file
378
tools/benchmark_tn_mpi.py
Normal file
@@ -0,0 +1,378 @@
|
||||
"""MPI-parallel TN benchmark: path search + contraction via MPI."""
|
||||
import json
|
||||
import pickle
|
||||
import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
import cotengra as ctg
|
||||
import qibo
|
||||
from qibo import Circuit, gates
|
||||
from mpi4py import MPI
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from qibotn.observables import check_observable, extract_gates_and_qubits
|
||||
|
||||
|
||||
def _load_observable(observable_file=None, observable_json=None):
|
||||
if observable_file:
|
||||
with open(observable_file, "r", encoding="utf8") as f:
|
||||
return json.load(f)
|
||||
if observable_json:
|
||||
return json.loads(observable_json)
|
||||
return None
|
||||
|
||||
|
||||
def _term_to_quimb_operator(term):
|
||||
"""Convert one extracted Hamiltonian term to a quimb operator."""
|
||||
import quimb as qu
|
||||
|
||||
coeff = complex(term[0][2]) if term else 1.0
|
||||
op = None
|
||||
where = []
|
||||
|
||||
for qubit, gate_name, _ in term:
|
||||
qubit = int(qubit)
|
||||
gate_name = str(gate_name).upper()
|
||||
if gate_name == "I":
|
||||
continue
|
||||
where.append(qubit)
|
||||
op = qu.pauli(gate_name.lower()) if op is None else op & qu.pauli(gate_name.lower())
|
||||
|
||||
return complex(coeff), op, tuple(where)
|
||||
|
||||
|
||||
def _run_serial_search(tn_bytes, output_inds, repeats, seed, num_slices, n_ranks, max_time):
|
||||
import pickle, cotengra as ctg, random
|
||||
random.seed(seed)
|
||||
tn = pickle.loads(tn_bytes)
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=['kahypar', 'kahypar-agglom', 'spinglass'],
|
||||
max_repeats=repeats,
|
||||
parallel=False,
|
||||
minimize='combo-256',
|
||||
max_time=max_time,
|
||||
optlib="random",
|
||||
slicing_opts={'target_size': 2**29, 'allow_outer': True},
|
||||
progbar=False,
|
||||
)
|
||||
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
|
||||
return tree.combo_cost(factor=256), tree
|
||||
|
||||
|
||||
def parallel_search(tn, output_inds, total_repeats, n_workers, num_slices, n_ranks,
|
||||
timeout):
|
||||
import pickle, os, signal
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
if n_workers <= 1:
|
||||
return _run_serial_search(
|
||||
tn_bytes, output_inds, total_repeats, 0, num_slices, n_ranks, timeout
|
||||
)[1]
|
||||
repeats_per = max(1, total_repeats // n_workers)
|
||||
best_cost, best_tree = float('inf'), None
|
||||
|
||||
pool = ProcessPoolExecutor(max_workers=n_workers)
|
||||
futures = [
|
||||
pool.submit(_run_serial_search, tn_bytes, output_inds,
|
||||
repeats_per, seed, num_slices, n_ranks, timeout)
|
||||
for seed in range(n_workers)
|
||||
]
|
||||
try:
|
||||
for fut in as_completed(futures, timeout=timeout + 5):
|
||||
try:
|
||||
cost, tree = fut.result()
|
||||
if cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
except Exception as e:
|
||||
print(f" [worker failed] {e}")
|
||||
except TimeoutError:
|
||||
pass
|
||||
finally:
|
||||
for fut in futures:
|
||||
fut.cancel()
|
||||
for pid in list(pool._processes.keys()):
|
||||
try:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
pool.shutdown(wait=False)
|
||||
|
||||
return best_tree
|
||||
|
||||
|
||||
def make_circuit(circuit_type, nqubits, nlayers=1):
|
||||
c = Circuit(nqubits)
|
||||
if circuit_type == "qft":
|
||||
from qibo.models import QFT
|
||||
return QFT(nqubits)
|
||||
elif circuit_type == "variational":
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
offset = layer % 2
|
||||
for q in range(offset, nqubits - 1, 2):
|
||||
c.add(gates.CZ(q, q + 1))
|
||||
elif circuit_type == "ghz":
|
||||
c.add(gates.H(0))
|
||||
for q in range(nqubits - 1):
|
||||
c.add(gates.CNOT(q, q + 1))
|
||||
elif circuit_type == "brickwork":
|
||||
for q in range(nqubits):
|
||||
c.add(gates.H(q))
|
||||
for layer in range(nlayers):
|
||||
offset = layer % 2
|
||||
for q in range(offset, nqubits - 1, 2):
|
||||
c.add(gates.CNOT(q, q + 1))
|
||||
c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit: {circuit_type}")
|
||||
return c
|
||||
|
||||
|
||||
def _contract_mpi(tree, arrays, comm, root=0):
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
is_torch = type(arrays[0]).__module__.startswith("torch")
|
||||
|
||||
result_np = None
|
||||
for i in range(rank, tree.multiplicity, size):
|
||||
x = tree.contract_slice(arrays, i)
|
||||
x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x))
|
||||
result_np = x_np if result_np is None else result_np + x_np
|
||||
|
||||
if result_np is None:
|
||||
result_np = np.zeros(1, dtype=np.complex128)
|
||||
|
||||
result = np.zeros_like(result_np) if rank == root else None
|
||||
comm.Reduce(result_np, result, root=root)
|
||||
|
||||
if rank == root:
|
||||
import torch
|
||||
return torch.from_numpy(np.asarray(result)) if is_torch else result
|
||||
return None
|
||||
|
||||
|
||||
def run_mpi(circuit, nqubits, num_slices, total_repeats=1024,
|
||||
load_path=None, save_path=None):
|
||||
"""Each MPI rank runs serial path search over total_repeats/size trials,
|
||||
rank 0 picks the global best, then all ranks contract in parallel."""
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
b = qibo.get_backend()
|
||||
b.configure_tn_simulation(ansatz="tn")
|
||||
|
||||
import torch
|
||||
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
|
||||
gate_opts={"max_bond": None, "cutoff": 1e-10})
|
||||
qc.to_backend = lambda x: torch.from_numpy(x).to(torch.complex128)
|
||||
|
||||
# --- path search: each rank serial, gather best to rank 0 ---
|
||||
if load_path:
|
||||
if rank == 0:
|
||||
with open(load_path, "rb") as f:
|
||||
saved = pickle.load(f)
|
||||
tree, psi, t_search = saved["tree"], saved["psi"], 0.0
|
||||
print(f" [path loaded] {load_path}")
|
||||
else:
|
||||
tree = psi = None
|
||||
t_search = 0.0
|
||||
else:
|
||||
rank_repeats = max(1, total_repeats // size)
|
||||
t0 = time.time()
|
||||
# get TN object first (no contraction), then run parallel search
|
||||
psi_tn = qc.to_dense(rehearse="tn")
|
||||
local_tree = parallel_search(
|
||||
psi_tn, psi_tn.outer_inds(), rank_repeats, n_workers=48,
|
||||
num_slices=num_slices, n_ranks=size, timeout=600,
|
||||
)
|
||||
t_search = time.time() - t0
|
||||
local_psi = psi_tn
|
||||
|
||||
all_results = comm.gather((local_tree.combo_cost(factor=256), local_tree, local_psi), root=0)
|
||||
if rank == 0:
|
||||
_, tree, psi = min(all_results, key=lambda x: x[0])
|
||||
print(f" [path search] {t_search:.3f}s "
|
||||
f"flops~2^{tree.contraction_cost(log=2):.2f} "
|
||||
f"size~2^{tree.contraction_width():.2f} "
|
||||
f"slices={tree.multiplicity}")
|
||||
if save_path:
|
||||
with open(save_path, "wb") as f:
|
||||
pickle.dump({"tree": tree, "psi": psi}, f)
|
||||
print(f" [path saved] {save_path}")
|
||||
else:
|
||||
tree = psi = None
|
||||
|
||||
if save_path:
|
||||
t_search = comm.bcast(t_search, root=0)
|
||||
return None, t_search
|
||||
|
||||
tree = comm.bcast(tree, root=0)
|
||||
psi = comm.bcast(psi, root=0)
|
||||
t_search = comm.bcast(t_search, root=0)
|
||||
|
||||
# --- contraction: all ranks work in parallel ---
|
||||
import torch
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in psi.arrays]
|
||||
t0 = time.time()
|
||||
sv = _contract_mpi(tree, arrays, comm, root=0)
|
||||
t_contract = time.time() - t0
|
||||
|
||||
if rank == 0:
|
||||
print(f" [contraction] {t_contract:.3f}s")
|
||||
return np.array(sv).reshape(-1), t_search + t_contract
|
||||
return None, t_search + t_contract
|
||||
|
||||
|
||||
def run_mpi_expval(
|
||||
circuit,
|
||||
nqubits,
|
||||
observable=None,
|
||||
total_repeats=1024,
|
||||
search_workers=1,
|
||||
search_timeout=300,
|
||||
):
|
||||
"""Compute a Hamiltonian expectation value directly from TN via MPI.
|
||||
MPI parallelizes over Hamiltonian terms; ProcessPool optionally helps
|
||||
path search for each term."""
|
||||
import torch
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
b = qibo.get_backend()
|
||||
b.configure_tn_simulation(ansatz="tn")
|
||||
|
||||
observable = check_observable(observable, nqubits)
|
||||
ham_gate_map = extract_gates_and_qubits(observable)
|
||||
|
||||
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
|
||||
gate_opts={"max_bond": None, "cutoff": 1e-10})
|
||||
|
||||
my_terms = ham_gate_map[rank::size]
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
t0 = time.time()
|
||||
|
||||
my_exp = 0.0 + 0.0j
|
||||
for term in my_terms:
|
||||
coeff, op, where = _term_to_quimb_operator(term)
|
||||
if op is None:
|
||||
my_exp += coeff
|
||||
continue
|
||||
tn = qc.local_expectation_tn(op, where=where)
|
||||
if len(tn.outer_inds()) == 0:
|
||||
val = complex(tn.contract())
|
||||
else:
|
||||
tree = parallel_search(
|
||||
tn,
|
||||
tn.outer_inds(),
|
||||
total_repeats,
|
||||
n_workers=search_workers,
|
||||
num_slices=1,
|
||||
n_ranks=size,
|
||||
timeout=search_timeout,
|
||||
)
|
||||
if tree is None:
|
||||
raise RuntimeError("Failed to find a contraction tree for expectation TN.")
|
||||
arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in tn.arrays]
|
||||
acc = sum(tree.contract_slice(arrays, i) for i in range(tree.multiplicity))
|
||||
val = complex(acc.item() if hasattr(acc, 'item') else acc)
|
||||
my_exp += coeff * val
|
||||
|
||||
t_total = time.time() - t0
|
||||
|
||||
all_results = comm.gather(my_exp, root=0)
|
||||
if rank == 0:
|
||||
total_exp = sum(all_results)
|
||||
print(f"\n[TN expval] time={t_total:.4f}s expval={total_exp.real:.12f}")
|
||||
return np.real_if_close(total_exp), t_total
|
||||
return None, t_total
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=30)
|
||||
parser.add_argument("--circuit", type=str, default="qft",
|
||||
choices=["qft", "variational", "ghz", "brickwork"])
|
||||
parser.add_argument("--nlayers", type=int, default=3)
|
||||
parser.add_argument("--num-slices", type=int, default=1)
|
||||
parser.add_argument("--total-repeats", type=int, default=1024)
|
||||
parser.add_argument("--search-workers", type=int, default=1)
|
||||
parser.add_argument("--search-timeout", type=int, default=300)
|
||||
parser.add_argument("--observable-file", type=str, default=None)
|
||||
parser.add_argument("--observable-json", type=str, default=None)
|
||||
parser.add_argument("--save-path", type=str, default=None)
|
||||
parser.add_argument("--load-path", type=str, default=None)
|
||||
parser.add_argument("--no-compare", action="store_true")
|
||||
parser.add_argument("--mode", type=str, default="sv", choices=["sv", "expval"])
|
||||
args = parser.parse_args()
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
|
||||
if rank == 0:
|
||||
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, "
|
||||
f"nlayers={args.nlayers}, ranks={comm.Get_size()}")
|
||||
|
||||
np.random.seed(42)
|
||||
circuit = make_circuit(args.circuit, args.nqubits, args.nlayers)
|
||||
observable = _load_observable(args.observable_file, args.observable_json)
|
||||
|
||||
if args.mode == "expval":
|
||||
try:
|
||||
expval, t_total = run_mpi_expval(
|
||||
circuit,
|
||||
args.nqubits,
|
||||
observable=observable,
|
||||
total_repeats=args.total_repeats,
|
||||
search_workers=args.search_workers,
|
||||
search_timeout=args.search_timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
if rank == 0:
|
||||
print(f"[FAILED] {e}")
|
||||
raise
|
||||
if rank == 0:
|
||||
np.save(f"data/expval_tn_{args.circuit}{args.nqubits}.npy", np.asarray(expval))
|
||||
if not args.no_compare:
|
||||
print("No built-in reference comparison for arbitrary observables.")
|
||||
return
|
||||
|
||||
try:
|
||||
sv, t_total = run_mpi(circuit, args.nqubits, args.num_slices,
|
||||
total_repeats=args.total_repeats,
|
||||
load_path=args.load_path, save_path=args.save_path)
|
||||
except Exception as e:
|
||||
if rank == 0:
|
||||
print(f"[FAILED] {e}")
|
||||
raise
|
||||
|
||||
if rank == 0 and sv is not None:
|
||||
print(f"\n[quimb TN MPI] time={t_total:.4f}s shape={sv.shape}")
|
||||
np.save(f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy", sv)
|
||||
|
||||
if not args.no_compare:
|
||||
from qibotn.bak.benchmark_tn import run_qibojit
|
||||
import gc
|
||||
np.random.seed(42)
|
||||
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
|
||||
sv_ref, t_ref = run_qibojit(circuit_ref)
|
||||
np.save(f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy", sv_ref)
|
||||
print(f"[qibojit] time={t_ref:.4f}s")
|
||||
# free memory before loading via mmap for expval comparison
|
||||
del sv, sv_ref
|
||||
gc.collect()
|
||||
from compare_jit_tn_quimb import check_results
|
||||
ref_path = f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy"
|
||||
tn_path = f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy"
|
||||
check_results(ref_path, tn_path, args.nqubits)
|
||||
if t_total > 0:
|
||||
print(f"Speedup : {t_ref/t_total:.2f}x")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
25
tools/check_tree.py
Normal file
25
tools/check_tree.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Check contraction tree statistics."""
|
||||
import pickle, sys
|
||||
|
||||
path = sys.argv[1] if len(sys.argv) > 1 else "data/tree_q25_l10.pkl"
|
||||
with open(path, 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
|
||||
# Intel 8558P: 96 cores, 2.1GHz, AVX-512 (16 FP64/cycle), FMA x2
|
||||
# complex128 multiply-add = 6 real FLOPs
|
||||
CORES = 96
|
||||
FREQ = 2.1e9
|
||||
AVX512_FP64 = 16
|
||||
TFLOPS = CORES * FREQ * AVX512_FP64 * 2 / 1e12 # ~6.45 TFLOPS real FP64
|
||||
COMPLEX_FLOPS = TFLOPS / 6 # complex128 effective
|
||||
|
||||
flops = tree.total_flops()
|
||||
slices = tree.multiplicity
|
||||
est_seconds = flops * slices / (COMPLEX_FLOPS * 1e12)
|
||||
|
||||
print(f"File: {path}")
|
||||
print(f"Peak memory (GB): {tree.max_size() * 16 / 1e9:.2f}")
|
||||
print(f"Total FLOPs: {flops:.2e} x{slices} slices = {flops*slices:.2e}")
|
||||
print(f"Contraction width: {tree.contraction_width()}")
|
||||
print(f"Multiplicity (slices): {slices}")
|
||||
print(f"Estimated time (96 cores): {est_seconds:.1f}s ({est_seconds/3600:.2f}h)")
|
||||
137
tools/compare_vidal_backend_qmatchatea.py
Normal file
137
tools/compare_vidal_backend_qmatchatea.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Compare QMatchaTeaBackend with the VidalBackend fast path."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed, kind):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind == "brickwall":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
elif kind == "shifted-cz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(q, q + 1))
|
||||
elif kind == "reversed-cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
return circuit
|
||||
|
||||
|
||||
def build_observable(nqubits, kind):
|
||||
form = 0
|
||||
if kind == "ring-xz":
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
elif kind == "open-zz":
|
||||
for q in range(nqubits - 1):
|
||||
form += Z(q) * Z(q + 1) / (nqubits - 1)
|
||||
elif kind == "mixed":
|
||||
form += 0.25 * X(0) - 0.5 * Z(nqubits - 1)
|
||||
for q in range(0, nqubits - 1, 3):
|
||||
form += 0.125 * Y(q) * Y(q + 1)
|
||||
else:
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def run_backend(backend, circuit, observable):
|
||||
start = time.perf_counter()
|
||||
value = backend.expectation(circuit, observable, preprocess=False, compile_circuit=True)
|
||||
return float(np.real(value)), time.perf_counter() - start
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=34)
|
||||
parser.add_argument("--nlayers", type=int, default=20)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("torch", "numpy"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument(
|
||||
"--circuit-kind",
|
||||
choices=("brickwall", "shifted-cz", "reversed-cnot"),
|
||||
default="brickwall",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--observable-kind",
|
||||
choices=("ring-xz", "open-zz", "mixed"),
|
||||
default="ring-xz",
|
||||
)
|
||||
parser.add_argument("--reference-file")
|
||||
parser.add_argument("--skip-qmatchatea", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed, args.circuit_kind)
|
||||
observable = build_observable(args.nqubits, args.observable_kind)
|
||||
|
||||
exact = None
|
||||
if args.reference_file:
|
||||
with open(args.reference_file, "r", encoding="utf-8") as f:
|
||||
exact = float(json.load(f)["expectation"])
|
||||
|
||||
print(
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} bond={args.bond} "
|
||||
f"circuit={args.circuit_kind} observable={args.observable_kind} "
|
||||
f"tensor_module={args.tensor_module} torch_threads={args.torch_threads}"
|
||||
)
|
||||
if exact is not None:
|
||||
print(f"exact={exact:.16e}")
|
||||
print("backend value abs_error seconds")
|
||||
|
||||
if not args.skip_qmatchatea:
|
||||
qmt = QMatchaTeaBackend()
|
||||
qmt.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
svd_control="E!",
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
)
|
||||
value, seconds = run_backend(qmt, circuit, observable)
|
||||
error = float("nan") if exact is None else abs(value - exact)
|
||||
print(f"qmatchatea {value:.16e} {error:.6e} {seconds:.3f}")
|
||||
|
||||
vidal = VidalBackend()
|
||||
vidal.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
fallback=True,
|
||||
)
|
||||
value, seconds = run_backend(vidal, circuit, observable)
|
||||
error = float("nan") if exact is None else abs(value - exact)
|
||||
print(f"vidal {value:.16e} {error:.6e} {seconds:.3f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
72
tools/profile_vidal_chrome.py
Normal file
72
tools/profile_vidal_chrome.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Chrome trace profiler for the VidalBackend fast path."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from torch.profiler import ProfilerActivity, profile
|
||||
|
||||
from qibotn.benchmark_cases import build_circuit, terms_to_dict, observable_terms
|
||||
from qibotn.expectation_runner import ExpectationConfig, run_cpu_expectation
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=34)
|
||||
parser.add_argument("--nlayers", type=int, default=20)
|
||||
parser.add_argument("--bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument("--cut-ratio", type=float, default=1e-12)
|
||||
parser.add_argument("--profile-memory", action="store_true")
|
||||
parser.add_argument("--rows", type=int, default=60)
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
|
||||
prefix = f"profiles/vidal_n{args.nqubits}_l{args.nlayers}_b{args.bond}_t{args.torch_threads}"
|
||||
trace_path = Path(f"{prefix}.json")
|
||||
table_path = Path(f"{prefix}.txt")
|
||||
trace_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
circuit = build_circuit("brickwall_cnot", args.nqubits, args.nlayers, args.seed)
|
||||
observable = terms_to_dict(observable_terms("ring_xz", args.nqubits))
|
||||
config = ExpectationConfig(
|
||||
ansatz="mps",
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
torch_threads=args.torch_threads,
|
||||
)
|
||||
|
||||
print(
|
||||
f"profile vidal nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={args.bond} threads={args.torch_threads}"
|
||||
)
|
||||
|
||||
with profile(
|
||||
activities=[ProfilerActivity.CPU],
|
||||
record_shapes=args.profile_memory,
|
||||
profile_memory=args.profile_memory,
|
||||
with_stack=args.profile_memory,
|
||||
) as prof:
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
|
||||
table = (
|
||||
f"expval={result.value:.16e}\n\n"
|
||||
f"# sorted by self_cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='self_cpu_time_total', row_limit=args.rows)}\n\n"
|
||||
f"# sorted by cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='cpu_time_total', row_limit=args.rows)}\n"
|
||||
)
|
||||
|
||||
print(table, end="")
|
||||
table_path.write_text(table, encoding="utf-8")
|
||||
prof.export_chrome_trace(str(trace_path))
|
||||
print(f"trace={trace_path}\ntable={table_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
109
tools/qibojit_reference_expectation.py
Normal file
109
tools/qibojit_reference_expectation.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""Compute and cache a qibojit state-vector reference for the ring-XZ observable."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import qibo
|
||||
from qibo import Circuit, gates
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for _ in range(nlayers):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz_expectation(state, nqubits, chunk_size):
|
||||
value = 0.0
|
||||
for qubit in range(nqubits):
|
||||
next_qubit = (qubit + 1) % nqubits
|
||||
x_flip = 1 << (nqubits - 1 - qubit)
|
||||
z_shift = nqubits - 1 - next_qubit
|
||||
term = 0.0
|
||||
for start in range(0, state.size, chunk_size):
|
||||
stop = min(start + chunk_size, state.size)
|
||||
indices = np.arange(start, stop, dtype=np.int64)
|
||||
z_bit = (indices >> z_shift) & 1
|
||||
z_phase = 1 - 2 * z_bit
|
||||
term += np.vdot(state[indices ^ x_flip], z_phase * state[start:stop]).real
|
||||
value += 0.5 * term
|
||||
return float(value)
|
||||
|
||||
|
||||
def default_output_path(nqubits, nlayers, seed):
|
||||
return Path("references") / (
|
||||
f"qibojit_ring_xz_n{nqubits}_l{nlayers}_seed{seed}.json"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=32)
|
||||
parser.add_argument("--nlayers", type=int, default=3)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--output")
|
||||
parser.add_argument("--force", action="store_true")
|
||||
parser.add_argument("--allow-large", action="store_true")
|
||||
parser.add_argument("--max-state-gb", type=float, default=32.0)
|
||||
parser.add_argument("--chunk-size", type=int, default=1 << 20)
|
||||
args = parser.parse_args()
|
||||
|
||||
output = Path(args.output) if args.output else default_output_path(
|
||||
args.nqubits, args.nlayers, args.seed
|
||||
)
|
||||
if output.exists() and not args.force:
|
||||
with open(output, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
print(f"loaded {output}")
|
||||
print(f"expectation={float(data['expectation']):.16e}")
|
||||
return
|
||||
|
||||
state_gb = (2**args.nqubits) * np.dtype(np.complex128).itemsize / (1024**3)
|
||||
if state_gb > args.max_state_gb and not args.allow_large:
|
||||
raise MemoryError(
|
||||
f"Estimated state vector alone is {state_gb:.1f} GiB. "
|
||||
"Pass --allow-large after confirming the node has enough memory."
|
||||
)
|
||||
|
||||
qibo.set_backend("qibojit")
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed)
|
||||
|
||||
start = time.perf_counter()
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
expectation = ring_xz_expectation(state, args.nqubits, args.chunk_size)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
data = {
|
||||
"backend": "qibojit",
|
||||
"observable": "0.5 * sum_i X_i Z_((i+1) mod n)",
|
||||
"nqubits": args.nqubits,
|
||||
"nlayers": args.nlayers,
|
||||
"seed": args.seed,
|
||||
"expectation": expectation,
|
||||
"seconds": elapsed,
|
||||
"state_vector_gib_estimate": state_gb,
|
||||
}
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, sort_keys=True)
|
||||
f.write("\n")
|
||||
|
||||
print(f"saved {output}")
|
||||
print(f"expectation={expectation:.16e}")
|
||||
print(f"seconds={elapsed:.3f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
127
tools/run_cpu_large_cases.sh
Executable file
127
tools/run_cpu_large_cases.sh
Executable file
@@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Large CPU expectation benchmarks for two-server runs.
|
||||
#
|
||||
# Defaults assume two Intel Xeon Platinum 8558P servers with about 500 GiB RAM
|
||||
# each. Override HOSTFILE, PYTHON_BIN, MPIEXEC, or the per-case knobs below as
|
||||
# needed.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-hostfile}"
|
||||
|
||||
MPS_RANKS="${MPS_RANKS:-8}"
|
||||
MPS_THREADS="${MPS_THREADS:-12}"
|
||||
TN_RANKS="${TN_RANKS:-12}"
|
||||
TN_THREADS="${TN_THREADS:-8}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
run_mpi() {
|
||||
local ranks="$1"
|
||||
shift
|
||||
"$MPIEXEC" -hostfile "$HOSTFILE" -n "$ranks" "$PYTHON_BIN" "$@"
|
||||
}
|
||||
|
||||
run_case() {
|
||||
local title="$1"
|
||||
shift
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$title"
|
||||
echo "================================================================================"
|
||||
echo "HOSTFILE=$HOSTFILE PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
echo "$*"
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
smoke)
|
||||
run_case "MPS MPI smoke: n=40 layers=30 bond=2048" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_SMOKE_NQ:-40}" \
|
||||
--nlayers "${MPS_SMOKE_LAYERS:-30}" \
|
||||
--bond "${MPS_SMOKE_BOND:-2048}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz \
|
||||
--observables ring_xz open_zz range2_xx
|
||||
|
||||
run_case "TN MPI smoke: n=32 layers=16 target_slices=12" \
|
||||
run_mpi "$TN_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "${TN_SMOKE_NQ:-32}" \
|
||||
--nlayers "${TN_SMOKE_LAYERS:-16}" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices "${TN_SMOKE_SLICES:-12}"
|
||||
;;
|
||||
|
||||
mps-long)
|
||||
run_case "MPS MPI long: n=64 layers=48 bond=4096" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_LONG_NQ:-64}" \
|
||||
--nlayers "${MPS_LONG_LAYERS:-48}" \
|
||||
--bond "${MPS_LONG_BOND:-4096}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
mps-pressure)
|
||||
run_case "MPS MPI pressure: n=80 layers=64 bond=4096" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_PRESSURE_NQ:-80}" \
|
||||
--nlayers "${MPS_PRESSURE_LAYERS:-64}" \
|
||||
--bond "${MPS_PRESSURE_BOND:-4096}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz swap_scramble \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
tn-long)
|
||||
run_case "TN MPI long: n=36 layers=20 target_slices=24" \
|
||||
run_mpi "$TN_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "${TN_LONG_NQ:-36}" \
|
||||
--nlayers "${TN_LONG_LAYERS:-20}" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices "${TN_LONG_SLICES:-24}"
|
||||
;;
|
||||
|
||||
all)
|
||||
"$0" smoke
|
||||
"$0" mps-long
|
||||
"$0" tn-long
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_cpu_large_cases.sh [smoke|mps-long|mps-pressure|tn-long|all]
|
||||
|
||||
Common overrides:
|
||||
HOSTFILE=hostfile
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPS_RANKS=8 MPS_THREADS=12
|
||||
TN_RANKS=12 TN_THREADS=8
|
||||
|
||||
Scale overrides:
|
||||
MPS_LONG_NQ=64 MPS_LONG_LAYERS=48 MPS_LONG_BOND=4096
|
||||
MPS_PRESSURE_NQ=80 MPS_PRESSURE_LAYERS=64 MPS_PRESSURE_BOND=4096
|
||||
TN_LONG_NQ=36 TN_LONG_LAYERS=20 TN_LONG_SLICES=24
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
148
tools/run_cpu_single_cases.sh
Executable file
148
tools/run_cpu_single_cases.sh
Executable file
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Single-node CPU scale probes for expectation benchmarks.
|
||||
#
|
||||
# Intended for one 96-core / ~500 GiB RAM node. The default "probe" mode runs
|
||||
# moderate MPS and TN cases first. Larger modes are available after checking
|
||||
# runtime and memory from the probe output.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
PYTHON_FLAGS="${PYTHON_FLAGS:--u}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
TIME_BIN="${TIME_BIN:-/usr/bin/time}"
|
||||
|
||||
MPS_RANKS="${MPS_RANKS:-8}"
|
||||
MPS_THREADS="${MPS_THREADS:-12}"
|
||||
TN_RANKS="${TN_RANKS:-8}"
|
||||
TN_THREADS="${TN_THREADS:-12}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
estimate_mps_memory() {
|
||||
local nqubits="$1"
|
||||
local bond="$2"
|
||||
"$PYTHON_BIN" - "$nqubits" "$bond" "$MPS_RANKS" <<'PY'
|
||||
import sys
|
||||
n = int(sys.argv[1])
|
||||
chi = int(sys.argv[2])
|
||||
ranks = int(sys.argv[3])
|
||||
resident = n * 2 * chi * chi * 16
|
||||
per_rank = resident / ranks
|
||||
print(
|
||||
"MPS rough resident memory: "
|
||||
f"total={resident / 1024**3:.1f} GiB "
|
||||
f"per_rank={per_rank / 1024**3:.1f} GiB "
|
||||
"(temporary eig/SVD workspaces are additional)"
|
||||
)
|
||||
PY
|
||||
}
|
||||
|
||||
run_timed() {
|
||||
echo
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "$*"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
"$TIME_BIN" -v "$@"
|
||||
}
|
||||
|
||||
run_mps_case() {
|
||||
local label="$1"
|
||||
local nqubits="$2"
|
||||
local nlayers="$3"
|
||||
local bond="$4"
|
||||
shift 4
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$label"
|
||||
echo "================================================================================"
|
||||
echo "PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "MPS_RANKS=$MPS_RANKS MPS_THREADS=$MPS_THREADS"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
estimate_mps_memory "$nqubits" "$bond"
|
||||
run_timed "$MPIEXEC" -n "$MPS_RANKS" "$PYTHON_BIN" $PYTHON_FLAGS benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "$nqubits" \
|
||||
--nlayers "$nlayers" \
|
||||
--bond "$bond" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
run_tn_case() {
|
||||
local label="$1"
|
||||
local nqubits="$2"
|
||||
local nlayers="$3"
|
||||
shift 3
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$label"
|
||||
echo "================================================================================"
|
||||
echo "PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "TN_RANKS=$TN_RANKS TN_THREADS=$TN_THREADS"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
echo "TN memory is contraction-tree dependent; increase --tn-target-slices if RSS is high."
|
||||
run_timed "$MPIEXEC" -n "$TN_RANKS" "$PYTHON_BIN" $PYTHON_FLAGS benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "$nqubits" \
|
||||
--nlayers "$nlayers" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
probe)
|
||||
run_mps_case "MPS probe: n=40 layers=30 bond=2048" 40 30 2048 \
|
||||
--circuits brickwall_cnot \
|
||||
--observables ring_xz
|
||||
|
||||
run_tn_case "TN probe: n=28 layers=12 target_slices=8" 28 12 \
|
||||
--circuits brickwall_cnot \
|
||||
--observables ring_xz \
|
||||
--tn-target-slices 8
|
||||
;;
|
||||
|
||||
mps-medium)
|
||||
run_mps_case "MPS medium: n=56 layers=40 bond=3072" 56 40 3072 \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
mps-long)
|
||||
run_mps_case "MPS long: n=64 layers=48 bond=4096" 64 48 4096 \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
tn-medium)
|
||||
run_tn_case "TN medium: n=32 layers=16 target_slices=16" 32 16 \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices 16
|
||||
;;
|
||||
|
||||
tn-long)
|
||||
run_tn_case "TN long: n=36 layers=20 target_slices=32" 36 20 \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices 32
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_cpu_single_cases.sh [probe|mps-medium|mps-long|tn-medium|tn-long]
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPS_RANKS=8 MPS_THREADS=12
|
||||
TN_RANKS=8 TN_THREADS=12
|
||||
OMP_NUM_THREADS=1 MKL_NUM_THREADS=1
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
70
tools/run_vidal_segment_mpi_scan.sh
Executable file
70
tools/run_vidal_segment_mpi_scan.sh
Executable file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NQ="${NQ:-34}"
|
||||
LAYERS="${LAYERS:-20}"
|
||||
BOND="${BOND:-512}"
|
||||
SEED="${SEED:-42}"
|
||||
RANKS="${RANKS:-1 2 4}"
|
||||
THREADS="${THREADS:-32 32 16}"
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
CIRCUIT="${CIRCUIT:-brickwall_cnot}"
|
||||
OBSERVABLE="${OBSERVABLE:-ring_xz}"
|
||||
EXACT="${EXACT:-0}"
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
if [[ "${1:-help}" != "run" ]]; then
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_vidal_segment_mpi_scan.sh run
|
||||
|
||||
Overrides:
|
||||
NQ=34 LAYERS=20 BOND=512 SEED=42
|
||||
RANKS="1 2 4" THREADS="32 32 16"
|
||||
CIRCUIT=brickwall_cnot OBSERVABLE=ring_xz
|
||||
EXACT=1
|
||||
PYTHON_BIN=.venv/bin/python MPIEXEC=mpiexec
|
||||
EOF
|
||||
if [[ "${1:-help}" == "help" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
exit 2
|
||||
fi
|
||||
|
||||
read -r -a ranks <<< "$RANKS"
|
||||
read -r -a threads <<< "$THREADS"
|
||||
|
||||
if [[ "${#ranks[@]}" != "${#threads[@]}" ]]; then
|
||||
echo "RANKS and THREADS must have the same number of entries." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
common=(
|
||||
--nqubits "$NQ"
|
||||
--nlayers "$LAYERS"
|
||||
--bond "$BOND"
|
||||
--seed "$SEED"
|
||||
--mps
|
||||
--circuits "$CIRCUIT"
|
||||
--observables "$OBSERVABLE"
|
||||
)
|
||||
|
||||
if [[ "$EXACT" == "1" ]]; then
|
||||
common+=(--exact)
|
||||
fi
|
||||
|
||||
for idx in "${!ranks[@]}"; do
|
||||
nrank="${ranks[$idx]}"
|
||||
nthr="${threads[$idx]}"
|
||||
if [[ "$nrank" == "1" ]]; then
|
||||
echo "== Vidal serial ranks=1 torch_threads=$nthr =="
|
||||
"$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
"${common[@]}" --torch-threads "$nthr"
|
||||
else
|
||||
echo "== Vidal segmented MPI ranks=$nrank torch_threads=$nthr =="
|
||||
"$MPIEXEC" -n "$nrank" "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
"${common[@]}" --torch-threads "$nthr" --mpi
|
||||
fi
|
||||
done
|
||||
202
tools/validate_vidal_mpi_correctness.py
Normal file
202
tools/validate_vidal_mpi_correctness.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""Correctness checks for the Vidal/TEBD MPS fast path.
|
||||
|
||||
The cases here intentionally cover more than the benchmark ring-XZ observable:
|
||||
different nearest-neighbor gate orientations and several Pauli-sum observables.
|
||||
Run serially to compare qibojit/statevector vs Vidal, or under MPI to compare
|
||||
the segmented Vidal executor.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from qibo import Circuit, gates
|
||||
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind == "rx_ry_cz":
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind in ("brickwall", "reversed_cnot"):
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
if kind == "reversed_cnot" and (layer % 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
else:
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
if kind == "reversed_cnot" and not (layer % 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
else:
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
elif kind == "rx_ry_cz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
return circuit
|
||||
|
||||
|
||||
def observable_terms(kind, nqubits):
|
||||
if kind == "ring_xz":
|
||||
return [
|
||||
(0.5, (("X", site), ("Z", (site + 1) % nqubits)))
|
||||
for site in range(nqubits)
|
||||
]
|
||||
if kind == "open_zz":
|
||||
return [
|
||||
(1.0 / (nqubits - 1), (("Z", site), ("Z", site + 1)))
|
||||
for site in range(nqubits - 1)
|
||||
]
|
||||
if kind == "mixed_local":
|
||||
terms = [(0.25, (("X", 0),)), (-0.5, (("Z", nqubits - 1),))]
|
||||
terms += [
|
||||
(0.125, (("Y", site), ("Y", site + 1)))
|
||||
for site in range(0, nqubits - 1, 3)
|
||||
]
|
||||
return terms
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def exact_pauli_sum(circuit, terms, nqubits):
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
indices = np.arange(state.size, dtype=np.int64)
|
||||
value = 0.0 + 0.0j
|
||||
for coeff, ops in terms:
|
||||
flipped = indices.copy()
|
||||
phase = np.ones(state.size, dtype=np.complex128)
|
||||
for name, site in ops:
|
||||
shift = nqubits - 1 - site
|
||||
bit = (indices >> shift) & 1
|
||||
name = name.upper()
|
||||
if name == "X":
|
||||
flipped ^= 1 << shift
|
||||
elif name == "Y":
|
||||
flipped ^= 1 << shift
|
||||
phase *= 1j * (1 - 2 * bit)
|
||||
elif name == "Z":
|
||||
phase *= 1 - 2 * bit
|
||||
elif name != "I":
|
||||
raise ValueError(f"Unsupported Pauli {name!r}.")
|
||||
value += coeff * np.vdot(state[flipped], phase * state)
|
||||
return float(value.real)
|
||||
|
||||
|
||||
def run_vidal(circuit, terms, nqubits, bond, tensor_module):
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return float(executor.expectation_pauli_sum(terms))
|
||||
|
||||
|
||||
def run_segment_mpi(circuit, terms, nqubits, bond, tensor_module, comm):
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return executor.expectation_pauli_sum_root(terms)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=16)
|
||||
parser.add_argument("--nlayers", type=int, default=6)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("torch", "numpy"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument(
|
||||
"--circuits",
|
||||
nargs="+",
|
||||
default=("brickwall", "reversed_cnot", "rx_ry_cz"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--observables",
|
||||
nargs="+",
|
||||
default=("ring_xz", "open_zz", "mixed_local"),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
comm = None
|
||||
rank = 0
|
||||
size = 1
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
if rank == 0:
|
||||
mode = f"vidal-segment-mpi/{size}" if args.mpi else "vidal"
|
||||
print(
|
||||
f"mode={mode} nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={args.bond} tensor_module={args.tensor_module}"
|
||||
)
|
||||
print("circuit observable exact value abs_error seconds")
|
||||
|
||||
for circuit_kind in args.circuits:
|
||||
circuit = build_circuit(circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
exact = None
|
||||
if rank == 0:
|
||||
exact_values = {
|
||||
obs: exact_pauli_sum(
|
||||
circuit, observable_terms(obs, args.nqubits), args.nqubits
|
||||
)
|
||||
for obs in args.observables
|
||||
}
|
||||
else:
|
||||
exact_values = None
|
||||
if comm is not None:
|
||||
exact_values = comm.bcast(exact_values, root=0)
|
||||
|
||||
for obs_kind in args.observables:
|
||||
terms = observable_terms(obs_kind, args.nqubits)
|
||||
start = time.perf_counter()
|
||||
if args.mpi:
|
||||
value = run_segment_mpi(
|
||||
circuit,
|
||||
terms,
|
||||
args.nqubits,
|
||||
args.bond,
|
||||
args.tensor_module,
|
||||
comm,
|
||||
)
|
||||
else:
|
||||
value = run_vidal(
|
||||
circuit, terms, args.nqubits, args.bond, args.tensor_module
|
||||
)
|
||||
if rank != 0:
|
||||
continue
|
||||
elapsed = time.perf_counter() - start
|
||||
exact = exact_values[obs_kind]
|
||||
print(
|
||||
f"{circuit_kind} {obs_kind} {exact:.16e} {value:.16e} "
|
||||
f"{abs(value - exact):.6e} {elapsed:.3f}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user