Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f93c95b3a1 | |||
| eed42dcfa9 | |||
|
|
28080dff1d | ||
|
|
ef3d7e9ee6 |
1288
.venv/lib/python3.12/site-packages/cotengra/contract.py
Normal file
1288
.venv/lib/python3.12/site-packages/cotengra/contract.py
Normal file
File diff suppressed because it is too large
Load Diff
4130
.venv/lib/python3.12/site-packages/cotengra/core.py
Normal file
4130
.venv/lib/python3.12/site-packages/cotengra/core.py
Normal file
File diff suppressed because it is too large
Load Diff
1168
.venv/lib/python3.12/site-packages/cotengra/hyperoptimizers/hyper.py
Normal file
1168
.venv/lib/python3.12/site-packages/cotengra/hyperoptimizers/hyper.py
Normal file
File diff suppressed because it is too large
Load Diff
583
.venv/lib/python3.12/site-packages/cotengra/parallel.py
Normal file
583
.venv/lib/python3.12/site-packages/cotengra/parallel.py
Normal file
@@ -0,0 +1,583 @@
|
||||
"""Interface for parallelism."""
|
||||
|
||||
import atexit
|
||||
import collections
|
||||
import functools
|
||||
import importlib
|
||||
import inspect
|
||||
import numbers
|
||||
import operator
|
||||
import warnings
|
||||
|
||||
_AUTO_BACKEND = None
|
||||
|
||||
# check for loky, joblib (vendors loky), then default to concurrent.futures
|
||||
have_loky = importlib.util.find_spec("loky") is not None
|
||||
have_joblib = importlib.util.find_spec("joblib") is not None
|
||||
if have_loky or have_joblib:
|
||||
_DEFAULT_BACKEND = "loky"
|
||||
else:
|
||||
_DEFAULT_BACKEND = "concurrent.futures"
|
||||
|
||||
|
||||
@functools.lru_cache(None)
|
||||
def choose_default_num_workers():
|
||||
import os
|
||||
|
||||
if "COTENGRA_NUM_WORKERS" in os.environ:
|
||||
return int(os.environ["COTENGRA_NUM_WORKERS"])
|
||||
|
||||
if "OMP_NUM_THREADS" in os.environ:
|
||||
return int(os.environ["OMP_NUM_THREADS"])
|
||||
|
||||
return os.cpu_count()
|
||||
|
||||
|
||||
def get_pool(n_workers=None, maybe_create=False, backend=None):
|
||||
"""Get a parallel pool."""
|
||||
if backend is None:
|
||||
backend = _DEFAULT_BACKEND
|
||||
|
||||
if backend == "dask":
|
||||
return _get_pool_dask(n_workers=n_workers, maybe_create=maybe_create)
|
||||
|
||||
if backend == "ray":
|
||||
return _get_pool_ray(n_workers=n_workers, maybe_create=maybe_create)
|
||||
|
||||
# above backends are distributed, don't specify n_workers
|
||||
if n_workers is None:
|
||||
n_workers = choose_default_num_workers()
|
||||
|
||||
if backend == "loky":
|
||||
get_reusable_executor = get_loky_get_reusable_executor()
|
||||
return get_reusable_executor(max_workers=n_workers)
|
||||
|
||||
if backend == "concurrent.futures":
|
||||
return _get_process_pool_cf(n_workers=n_workers)
|
||||
|
||||
if backend == "threads":
|
||||
return _get_thread_pool_cf(n_workers=n_workers)
|
||||
|
||||
|
||||
@functools.lru_cache(None)
|
||||
def _infer_backed_cached(pool_class):
|
||||
if pool_class.__name__ == "RayExecutor":
|
||||
return "ray"
|
||||
|
||||
path = pool_class.__module__.split(".")
|
||||
|
||||
if path[0] == "concurrent":
|
||||
return "concurrent.futures"
|
||||
|
||||
if path[0] == "joblib":
|
||||
return "loky"
|
||||
|
||||
if path[0] == "distributed":
|
||||
return "dask"
|
||||
|
||||
return path[0]
|
||||
|
||||
|
||||
def _infer_backend(pool):
|
||||
"""Return the backend type of ``pool`` - cached for speed."""
|
||||
return _infer_backed_cached(pool.__class__)
|
||||
|
||||
|
||||
def get_n_workers(pool=None):
|
||||
"""Extract how many workers our pool has (mostly for working out how many
|
||||
tasks to pre-dispatch).
|
||||
"""
|
||||
if pool is None:
|
||||
pool = get_pool()
|
||||
|
||||
try:
|
||||
return pool._max_workers
|
||||
except AttributeError:
|
||||
pass
|
||||
|
||||
backend = _infer_backend(pool)
|
||||
|
||||
if backend == "dask":
|
||||
workers = pool.scheduler_info(n_workers=-1)["workers"]
|
||||
return sum(int(w.get("nthreads", 1) or 1) for w in workers.values())
|
||||
|
||||
if backend == "ray":
|
||||
while True:
|
||||
try:
|
||||
return int(get_ray().available_resources()["CPU"])
|
||||
except KeyError:
|
||||
import time
|
||||
|
||||
time.sleep(1e-3)
|
||||
|
||||
if backend == "mpi4py":
|
||||
from mpi4py import MPI
|
||||
|
||||
return MPI.COMM_WORLD.size
|
||||
|
||||
raise ValueError(f"Can't find number of workers in pool {pool}.")
|
||||
|
||||
|
||||
def parse_parallel_arg(parallel):
|
||||
""" """
|
||||
global _AUTO_BACKEND
|
||||
|
||||
if parallel == "auto":
|
||||
return get_pool(maybe_create=False, backend=_AUTO_BACKEND)
|
||||
|
||||
if parallel is False:
|
||||
return None
|
||||
|
||||
if parallel is True:
|
||||
if _AUTO_BACKEND is None:
|
||||
_AUTO_BACKEND = _DEFAULT_BACKEND
|
||||
parallel = _AUTO_BACKEND
|
||||
|
||||
if isinstance(parallel, numbers.Integral):
|
||||
_AUTO_BACKEND = _DEFAULT_BACKEND
|
||||
return get_pool(
|
||||
n_workers=parallel, maybe_create=True, backend=_DEFAULT_BACKEND
|
||||
)
|
||||
|
||||
if parallel == "loky":
|
||||
return get_pool(maybe_create=True, backend="loky")
|
||||
|
||||
if parallel == "concurrent.futures":
|
||||
return get_pool(maybe_create=True, backend="concurrent.futures")
|
||||
|
||||
if parallel == "threads":
|
||||
return get_pool(maybe_create=True, backend="threads")
|
||||
|
||||
if parallel == "dask":
|
||||
_AUTO_BACKEND = "dask"
|
||||
return get_pool(maybe_create=True, backend="dask")
|
||||
|
||||
if parallel == "ray":
|
||||
_AUTO_BACKEND = "ray"
|
||||
return get_pool(maybe_create=True, backend="ray")
|
||||
|
||||
return parallel
|
||||
|
||||
|
||||
def set_parallel_backend(backend):
|
||||
"""Create a parallel pool of type ``backend`` which registers it as the
|
||||
default for ``'auto'`` parallel.
|
||||
"""
|
||||
return parse_parallel_arg(backend)
|
||||
|
||||
|
||||
def maybe_leave_pool(pool):
|
||||
"""Logic required for nested parallelism in dask.distributed."""
|
||||
if _infer_backend(pool) == "dask":
|
||||
return _maybe_leave_pool_dask()
|
||||
|
||||
|
||||
def maybe_rejoin_pool(is_worker, pool):
|
||||
"""Logic required for nested parallelism in dask.distributed."""
|
||||
if is_worker and _infer_backend(pool) == "dask":
|
||||
_rejoin_pool_dask()
|
||||
|
||||
|
||||
def submit(pool, fn, *args, **kwargs):
|
||||
"""Interface for submitting ``fn(*args, **kwargs)`` to ``pool``."""
|
||||
if _infer_backend(pool) == "dask":
|
||||
kwargs.setdefault("pure", False)
|
||||
return pool.submit(fn, *args, **kwargs)
|
||||
|
||||
|
||||
def scatter(pool, data):
|
||||
"""Interface for maybe turning ``data`` into a remote object or reference."""
|
||||
if _infer_backend(pool) in ("dask", "ray"):
|
||||
return pool.scatter(data)
|
||||
return data
|
||||
|
||||
|
||||
def can_scatter(pool):
|
||||
"""Whether ``pool`` can make objects remote."""
|
||||
return _infer_backend(pool) in ("dask", "ray")
|
||||
|
||||
|
||||
def should_nest(pool):
|
||||
"""Given argument ``pool`` should we try nested parallelism."""
|
||||
if pool is None:
|
||||
return False
|
||||
backend = _infer_backend(pool)
|
||||
if backend in ("ray", "dask"):
|
||||
return backend
|
||||
return False
|
||||
|
||||
|
||||
# ---------------------------------- loky ----------------------------------- #
|
||||
|
||||
|
||||
@functools.lru_cache(1)
|
||||
def get_loky_get_reusable_executor():
|
||||
try:
|
||||
from loky import get_reusable_executor
|
||||
except ImportError:
|
||||
from joblib.externals.loky import get_reusable_executor
|
||||
return get_reusable_executor
|
||||
|
||||
|
||||
# --------------------------- concurrent.futures ---------------------------- #
|
||||
|
||||
|
||||
class CachedProcessPoolExecutor:
|
||||
def __init__(self):
|
||||
self._pool = None
|
||||
self._n_workers = -1
|
||||
atexit.register(self.shutdown)
|
||||
|
||||
def __call__(self, n_workers=None):
|
||||
if n_workers != self._n_workers:
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
|
||||
self.shutdown()
|
||||
self._pool = ProcessPoolExecutor(n_workers)
|
||||
self._n_workers = n_workers
|
||||
return self._pool
|
||||
|
||||
def is_initialized(self):
|
||||
return self._pool is not None
|
||||
|
||||
def shutdown(self):
|
||||
if self._pool is not None:
|
||||
self._pool.shutdown()
|
||||
self._pool = None
|
||||
|
||||
def __del__(self):
|
||||
self.shutdown()
|
||||
|
||||
|
||||
ProcessPoolHandler = CachedProcessPoolExecutor()
|
||||
|
||||
|
||||
def _get_process_pool_cf(n_workers=None):
|
||||
return ProcessPoolHandler(n_workers)
|
||||
|
||||
|
||||
class CachedThreadPoolExecutor:
|
||||
def __init__(self):
|
||||
self._pool = None
|
||||
self._n_workers = -1
|
||||
atexit.register(self.shutdown)
|
||||
|
||||
def __call__(self, n_workers=None):
|
||||
if n_workers != self._n_workers:
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
self.shutdown()
|
||||
self._pool = ThreadPoolExecutor(n_workers)
|
||||
self._n_workers = n_workers
|
||||
return self._pool
|
||||
|
||||
def is_initialized(self):
|
||||
return self._pool is not None
|
||||
|
||||
def shutdown(self):
|
||||
if self._pool is not None:
|
||||
self._pool.shutdown()
|
||||
self._pool = None
|
||||
|
||||
def __del__(self):
|
||||
self.shutdown()
|
||||
|
||||
|
||||
ThreadPoolHandler = CachedThreadPoolExecutor()
|
||||
|
||||
|
||||
def _get_thread_pool_cf(n_workers=None):
|
||||
return ThreadPoolHandler(n_workers)
|
||||
|
||||
|
||||
# ---------------------------------- DASK ----------------------------------- #
|
||||
|
||||
|
||||
def _get_pool_dask(n_workers=None, maybe_create=False):
|
||||
"""Maybe get an existing or create a new dask.distrbuted client.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_workers : None or int, optional
|
||||
The number of workers to request if creating a new client.
|
||||
maybe_create : bool, optional
|
||||
Whether to create an new local cluster and client if no existing client
|
||||
is found.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None or dask.distributed.Client
|
||||
"""
|
||||
try:
|
||||
from dask.distributed import get_client
|
||||
except ImportError:
|
||||
if not maybe_create:
|
||||
return None
|
||||
else:
|
||||
raise
|
||||
|
||||
try:
|
||||
client = get_client()
|
||||
except ValueError:
|
||||
if not maybe_create:
|
||||
return None
|
||||
|
||||
import shutil
|
||||
import tempfile
|
||||
|
||||
from dask.distributed import Client, LocalCluster
|
||||
|
||||
local_directory = tempfile.mkdtemp()
|
||||
lc = LocalCluster(
|
||||
n_workers=n_workers,
|
||||
threads_per_worker=1,
|
||||
local_directory=local_directory,
|
||||
memory_limit=0,
|
||||
)
|
||||
client = Client(lc)
|
||||
|
||||
warnings.warn(
|
||||
"Parallel specified but no existing global dask client found... "
|
||||
"created one (with {} workers).".format(get_n_workers(client))
|
||||
)
|
||||
|
||||
@atexit.register
|
||||
def delete_local_dask_directory():
|
||||
shutil.rmtree(local_directory, ignore_errors=True)
|
||||
|
||||
if n_workers is not None:
|
||||
current_n_workers = get_n_workers(client)
|
||||
if n_workers != current_n_workers:
|
||||
warnings.warn(
|
||||
"Found existing client (with {} workers which) doesn't match "
|
||||
"the requested {}... using it instead.".format(
|
||||
current_n_workers, n_workers
|
||||
)
|
||||
)
|
||||
|
||||
return client
|
||||
|
||||
|
||||
def _maybe_leave_pool_dask():
|
||||
try:
|
||||
from dask.distributed import secede
|
||||
|
||||
secede() # for nested parallelism
|
||||
is_dask_worker = True
|
||||
except (ImportError, ValueError):
|
||||
is_dask_worker = False
|
||||
return is_dask_worker
|
||||
|
||||
|
||||
def _rejoin_pool_dask():
|
||||
from dask.distributed import rejoin
|
||||
|
||||
rejoin()
|
||||
|
||||
|
||||
# ----------------------------------- RAY ----------------------------------- #
|
||||
|
||||
|
||||
@functools.lru_cache(None)
|
||||
def get_ray():
|
||||
""" """
|
||||
import ray
|
||||
|
||||
return ray
|
||||
|
||||
|
||||
class RayFuture:
|
||||
"""Basic ``concurrent.futures`` like future wrapping a ray ``ObjectRef``."""
|
||||
|
||||
__slots__ = ("_obj", "_cancelled")
|
||||
|
||||
def __init__(self, obj):
|
||||
self._obj = obj
|
||||
self._cancelled = False
|
||||
|
||||
def result(self, timeout=None):
|
||||
return get_ray().get(self._obj, timeout=timeout)
|
||||
|
||||
def done(self):
|
||||
return self._cancelled or bool(
|
||||
get_ray().wait([self._obj], timeout=0)[0]
|
||||
)
|
||||
|
||||
def cancel(self):
|
||||
get_ray().cancel(self._obj)
|
||||
self._cancelled = True
|
||||
|
||||
|
||||
def _unpack_futures_tuple(x):
|
||||
return tuple(map(_unpack_futures, x))
|
||||
|
||||
|
||||
def _unpack_futures_list(x):
|
||||
return list(map(_unpack_futures, x))
|
||||
|
||||
|
||||
def _unpack_futures_dict(x):
|
||||
return {k: _unpack_futures(v) for k, v in x.items()}
|
||||
|
||||
|
||||
def _unpack_futures_identity(x):
|
||||
return x
|
||||
|
||||
|
||||
_unpack_dispatch = collections.defaultdict(
|
||||
lambda: _unpack_futures_identity,
|
||||
{
|
||||
RayFuture: operator.attrgetter("_obj"),
|
||||
tuple: _unpack_futures_tuple,
|
||||
list: _unpack_futures_list,
|
||||
dict: _unpack_futures_dict,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def _unpack_futures(x):
|
||||
"""Allows passing futures by reference - takes e.g. args and kwargs and
|
||||
replaces all ``RayFuture`` objects with their underyling ``ObjectRef``
|
||||
within all nested tuples, lists and dicts.
|
||||
|
||||
[Subclassing ``ObjectRef`` might avoid needing this.]
|
||||
"""
|
||||
return _unpack_dispatch[x.__class__](x)
|
||||
|
||||
|
||||
@functools.lru_cache(2**14)
|
||||
def get_remote_fn(fn, **remote_opts):
|
||||
"""Cached retrieval of remote function."""
|
||||
ray = get_ray()
|
||||
if remote_opts:
|
||||
return ray.remote(**remote_opts)(fn)
|
||||
return ray.remote(fn)
|
||||
|
||||
|
||||
@functools.lru_cache(2**14)
|
||||
def get_fn_as_remote_object(fn):
|
||||
ray = get_ray()
|
||||
return ray.put(fn)
|
||||
|
||||
|
||||
@functools.lru_cache(None)
|
||||
def get_deploy(**remote_opts):
|
||||
"""Alternative for 'non-function' callables - e.g. partial
|
||||
functions - pass the callable object too.
|
||||
"""
|
||||
ray = get_ray()
|
||||
|
||||
def deploy(fn, *args, **kwargs):
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
if remote_opts:
|
||||
return ray.remote(**remote_opts)(deploy)
|
||||
return ray.remote(deploy)
|
||||
|
||||
|
||||
class RayExecutor:
|
||||
"""Basic ``concurrent.futures`` like interface using ``ray``."""
|
||||
|
||||
def __init__(self, *args, default_remote_opts=None, **kwargs):
|
||||
ray = get_ray()
|
||||
if not ray.is_initialized():
|
||||
ray.init(*args, **kwargs)
|
||||
|
||||
self.default_remote_opts = (
|
||||
{} if default_remote_opts is None else dict(default_remote_opts)
|
||||
)
|
||||
|
||||
def _maybe_inject_remote_opts(self, remote_opts=None):
|
||||
"""Return the default remote options, possibly overriding some with
|
||||
those supplied by a ``submit call``.
|
||||
"""
|
||||
ropts = self.default_remote_opts
|
||||
if remote_opts is not None:
|
||||
ropts = {**ropts, **remote_opts}
|
||||
return ropts
|
||||
|
||||
def submit(self, fn, *args, pure=False, remote_opts=None, **kwargs):
|
||||
"""Remotely run ``fn(*args, **kwargs)``, returning a ``RayFuture``."""
|
||||
# want to pass futures by reference
|
||||
args = _unpack_futures_tuple(args)
|
||||
kwargs = _unpack_futures_dict(kwargs)
|
||||
|
||||
ropts = self._maybe_inject_remote_opts(remote_opts)
|
||||
|
||||
# this is the same test ray uses to accept functions
|
||||
if inspect.isfunction(fn):
|
||||
# can use the faster cached remote function
|
||||
obj = get_remote_fn(fn, **ropts).remote(*args, **kwargs)
|
||||
else:
|
||||
fn_obj = get_fn_as_remote_object(fn)
|
||||
obj = get_deploy(**ropts).remote(fn_obj, *args, **kwargs)
|
||||
|
||||
return RayFuture(obj)
|
||||
|
||||
def map(self, func, *iterables, remote_opts=None):
|
||||
"""Remote map ``func`` over arguments ``iterables``."""
|
||||
ropts = self._maybe_inject_remote_opts(remote_opts)
|
||||
remote_fn = get_remote_fn(func, **ropts)
|
||||
objs = tuple(map(remote_fn.remote, *iterables))
|
||||
ray = get_ray()
|
||||
return map(ray.get, objs)
|
||||
|
||||
def scatter(self, data):
|
||||
"""Push ``data`` into the distributed store, returning an ``ObjectRef``
|
||||
that can be supplied to ``submit`` calls for example.
|
||||
"""
|
||||
ray = get_ray()
|
||||
return ray.put(data)
|
||||
|
||||
def shutdown(self):
|
||||
"""Shutdown the parent ray cluster, this ``RayExecutor`` instance
|
||||
itself does not need any cleanup.
|
||||
"""
|
||||
get_ray().shutdown()
|
||||
|
||||
|
||||
_RAY_EXECUTOR = None
|
||||
|
||||
|
||||
def _get_pool_ray(n_workers=None, maybe_create=False):
|
||||
"""Maybe get an existing or create a new RayExecutor, thus initializing,
|
||||
ray.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_workers : None or int, optional
|
||||
The number of workers to request if creating a new client.
|
||||
maybe_create : bool, optional
|
||||
Whether to create initialize ray and return a RayExecutor if not
|
||||
initialized already.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None or RayExecutor
|
||||
"""
|
||||
try:
|
||||
import ray
|
||||
except ImportError:
|
||||
if not maybe_create:
|
||||
return None
|
||||
else:
|
||||
raise
|
||||
|
||||
global _RAY_EXECUTOR
|
||||
|
||||
if (_RAY_EXECUTOR is None) or (not ray.is_initialized()):
|
||||
if not maybe_create:
|
||||
return None
|
||||
_RAY_EXECUTOR = RayExecutor(num_cpus=n_workers)
|
||||
|
||||
if n_workers is not None:
|
||||
current_n_workers = get_n_workers(_RAY_EXECUTOR)
|
||||
if n_workers != current_n_workers:
|
||||
warnings.warn(
|
||||
"Found initialized ray (with {} workers which) doesn't match "
|
||||
"the requested {}... sticking with old number.".format(
|
||||
current_n_workers, n_workers
|
||||
)
|
||||
)
|
||||
|
||||
return _RAY_EXECUTOR
|
||||
1009
.venv/lib/python3.12/site-packages/qmatchatea/py_emulator.py
Normal file
1009
.venv/lib/python3.12/site-packages/qmatchatea/py_emulator.py
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,691 @@
|
||||
# This code is part of qtealeaves.
|
||||
#
|
||||
# This code is licensed under the Apache License, Version 2.0. You may
|
||||
# obtain a copy of this license in the LICENSE.txt file in the root directory
|
||||
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
#
|
||||
# Any modifications or derivative works of this code must retain this
|
||||
# copyright notice, and modified files need to carry a notice indicating
|
||||
# that they have been altered from the originals.
|
||||
|
||||
"""
|
||||
The module contains a the MPI version of the MPS simulator.
|
||||
|
||||
Code for the MPI simulations should be run as:
|
||||
|
||||
.. code-block::
|
||||
mpiexec -n 4 python my_mpi_script.py
|
||||
|
||||
where we used 4 processes as an example.
|
||||
"""
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
|
||||
from qtealeaves.convergence_parameters import TNConvergenceParameters
|
||||
from qtealeaves.tensors import TensorBackend
|
||||
from qtealeaves.tooling.mpisupport import MPI, TN_MPI_TYPES
|
||||
|
||||
from .mps_simulator import MPS
|
||||
|
||||
__all__ = ["MPIMPS"]
|
||||
|
||||
|
||||
def _mpi_array_dtype(array):
|
||||
"""Return the MPI dtype for numpy arrays and CPU tensor buffers."""
|
||||
dtype = array.dtype
|
||||
if hasattr(dtype, "str"):
|
||||
return TN_MPI_TYPES[dtype.str]
|
||||
|
||||
# qredtea torch singular values are raw torch.Tensor objects, not
|
||||
# QteaTorchTensor instances, so they do not expose dtype_mpi().
|
||||
import torch
|
||||
|
||||
return {
|
||||
torch.complex128: MPI.DOUBLE_COMPLEX,
|
||||
torch.complex64: MPI.COMPLEX,
|
||||
torch.float64: MPI.DOUBLE_PRECISION,
|
||||
torch.float32: MPI.REAL,
|
||||
torch.int64: MPI.INT,
|
||||
}[dtype]
|
||||
|
||||
|
||||
def _mpi_send_array(comm, array, to_):
|
||||
if hasattr(array, "resolve_conj"):
|
||||
array = array.resolve_conj().contiguous()
|
||||
comm.Send([array, _mpi_array_dtype(array)], to_)
|
||||
|
||||
|
||||
def _mpi_empty_like(array, shape):
|
||||
if hasattr(array, "resolve_conj"):
|
||||
import torch
|
||||
|
||||
return torch.empty(shape, dtype=array.dtype, device="cpu")
|
||||
return np.empty(shape, array.dtype)
|
||||
|
||||
|
||||
def _mpi_recv_array(comm, template, shape, from_):
|
||||
array = _mpi_empty_like(template, shape)
|
||||
comm.Recv([array, _mpi_array_dtype(array)], from_)
|
||||
if hasattr(template, "device") and hasattr(array, "to"):
|
||||
array = array.to(device=template.device)
|
||||
return array
|
||||
|
||||
|
||||
# pylint: disable-next=too-many-instance-attributes
|
||||
class MPIMPS(MPS):
|
||||
"""
|
||||
MPI version of the MPS emulator that divides the MPS between the different nodes
|
||||
|
||||
Parameters
|
||||
----------
|
||||
num_sites: int
|
||||
Number of sites
|
||||
convergence_parameters: :py:class:`TNConvergenceParameters`
|
||||
Class for handling convergence parameters. In particular, in the MPS simulator we are
|
||||
interested in:
|
||||
- the *maximum bond dimension* :math:`\\chi`;
|
||||
- the *cut ratio* :math:`\\epsilon` after which the singular
|
||||
values are neglected, i.e. if :math:`\\lamda_1` is the
|
||||
bigger singular values then after an SVD we neglect all the
|
||||
singular values such that :math:`\\frac{\\lambda_i}{\\lambda_1}\\leq\\epsilon`
|
||||
local_dim: int or list of ints, optional
|
||||
Local dimension of the degrees of freedom. Default to 2.
|
||||
If a list is given, then it must have length num_sites.
|
||||
initialize: str, optional
|
||||
The method for the initialization. Default to "vacuum"
|
||||
Available:
|
||||
- "vacuum", for the |000...0> state
|
||||
- "random", for a random state at given bond dimension
|
||||
tensor_backend : `None` or instance of :class:`TensorBackend`
|
||||
Default for `None` is :class:`QteaTensor` with np.complex128 on CPU.
|
||||
|
||||
"""
|
||||
|
||||
# pylint: disable-next=too-many-arguments
|
||||
def __init__(
|
||||
self,
|
||||
num_sites,
|
||||
convergence_parameters,
|
||||
local_dim=2,
|
||||
initialize="vacuum",
|
||||
tensor_backend=None,
|
||||
):
|
||||
if MPI is None:
|
||||
raise ImportError("No module mpi4py found in python environment")
|
||||
# MPI variables
|
||||
# pylint: disable-next=c-extension-no-member
|
||||
self.comm = MPI.COMM_WORLD
|
||||
self.size = self.comm.Get_size()
|
||||
self.rank = self.comm.Get_rank()
|
||||
self.tot_sites = num_sites
|
||||
|
||||
# Number of sites in the local MPS
|
||||
modulus = num_sites % self.size
|
||||
local_num_size = int(np.floor(num_sites // self.size))
|
||||
self.indexes = [0] + [
|
||||
local_num_size + 1 if ii < modulus else local_num_size
|
||||
for ii in range(self.size)
|
||||
]
|
||||
local_num_size = self.indexes[self.rank + 1]
|
||||
|
||||
# indexes takes into account which indexes are in each core
|
||||
self.indexes = np.cumsum(self.indexes)
|
||||
|
||||
# The par_map is a dicrionary where the index is the position of the
|
||||
# sites in the full chain, while the value the position on the
|
||||
# subchain in this process
|
||||
self.par_map = dict(
|
||||
zip(
|
||||
np.arange(
|
||||
self.indexes[self.rank], self.indexes[self.rank + 1], dtype=int
|
||||
),
|
||||
np.arange(local_num_size, dtype=int),
|
||||
)
|
||||
)
|
||||
|
||||
# Auxiliary site for the boundaries
|
||||
if self.rank < self.size - 1:
|
||||
local_num_size += 1
|
||||
|
||||
if not np.isscalar(local_dim):
|
||||
local_dim = local_dim[
|
||||
self.indexes[self.rank] : self.indexes[self.rank + 1]
|
||||
+ int(self.rank != (self.size - 1))
|
||||
]
|
||||
|
||||
super().__init__(
|
||||
local_num_size,
|
||||
convergence_parameters,
|
||||
local_dim=local_dim,
|
||||
initialize=initialize,
|
||||
tensor_backend=tensor_backend,
|
||||
)
|
||||
|
||||
# MPS initializetion not aware of device
|
||||
self.convert(self.tensor_backend.dtype, self.tensor_backend.memory_device)
|
||||
|
||||
@property
|
||||
def mpi_dtype(self):
|
||||
"""Return the MPI version of the MPS dtype (going via first tensor)"""
|
||||
return TN_MPI_TYPES[np.dtype(self[0].dtype).str]
|
||||
|
||||
def get_tensor_of_site(self, idx):
|
||||
"""Retrieve tensor of specifc site."""
|
||||
return self[self.par_map[idx]]
|
||||
|
||||
def apply_one_site_operator(self, op, pos):
|
||||
"""
|
||||
Applies a one operator `op` to the site `pos` of the MPIMPS.
|
||||
Instead of communicating the changes on the boundaries we
|
||||
perform an additional contraction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op: numpy array shape (local_dim, local_dim)
|
||||
Matrix representation of the quantum gate
|
||||
pos: int
|
||||
Position of the qubit where to apply `op`.
|
||||
"""
|
||||
# Apply the gate on the right MPS
|
||||
if pos in self.par_map:
|
||||
super().apply_one_site_operator(op, self.par_map[pos])
|
||||
|
||||
# For one-qubit gates it is more convenient to apply them both to
|
||||
# the real and auxiliary qubits if they are on the boundaries
|
||||
elif pos - 1 in self.par_map:
|
||||
super().apply_one_site_operator(op, self.num_sites - 1)
|
||||
|
||||
return None
|
||||
|
||||
# pylint: disable-next=too-many-arguments
|
||||
def apply_two_site_operator(self, op, pos, swap=False, svd=None, parallel=None):
|
||||
"""
|
||||
Applies a two-site operator `op` to the site `pos`, `pos+1` of the MPS.
|
||||
Then, perform the necessary communications between the interested
|
||||
process and the process
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op: numpy array shape (local_dim, local_dim, local_dim, local_dim)
|
||||
Matrix representation of the quantum gate
|
||||
pos: int or list of ints
|
||||
Position of the qubit where to apply `op`. If a list is passed,
|
||||
the two sites should be adjacent. The first index is assumed to
|
||||
be the control, and the second the target. The swap argument is
|
||||
overwritten if a list is passed.
|
||||
swap: bool
|
||||
If True swaps the operator. This means that instead of the
|
||||
first contraction in the following we get the second.
|
||||
It is written is a list of pos is passed.
|
||||
svd : None
|
||||
Required for compatibility. Can be only True.
|
||||
parallel: None
|
||||
Required for compatibility. Can be only True
|
||||
|
||||
Returns
|
||||
-------
|
||||
singular_values_cutted: ndarray
|
||||
Array of singular values cutted, normalized to the biggest singular value
|
||||
|
||||
"""
|
||||
if not np.isscalar(pos) and len(pos) == 2:
|
||||
pos = min(pos[0], pos[1])
|
||||
elif not np.isscalar(pos):
|
||||
raise ValueError(
|
||||
f"pos should be only scalar or len 2 array-like, not len {len(pos)}"
|
||||
)
|
||||
|
||||
# Hardcoded but necessary for compatibility
|
||||
svd = True
|
||||
if parallel is None:
|
||||
parallel_env = os.environ.get("QTEALEAVES_MPIMPS_PARALLEL", "1").lower()
|
||||
parallel = parallel_env not in ("0", "false", "no", "off")
|
||||
|
||||
if pos in self.par_map:
|
||||
res = super().apply_two_site_operator(
|
||||
op, self.par_map[pos], swap, svd=svd, parallel=parallel
|
||||
)
|
||||
|
||||
# Send the information back to the auxiliary if it was the first site
|
||||
if self.par_map[pos] == 0 and self.rank > 0:
|
||||
self.mpi_send_tensor(self[0], to_=self.rank - 1)
|
||||
_mpi_send_array(self.comm, self.singvals[1], self.rank - 1)
|
||||
|
||||
# Send the information towards the next if it was the last site
|
||||
elif self.par_map[pos] == self.num_sites - 2 and self.rank < self.size - 1:
|
||||
self.mpi_send_tensor(self[self.num_sites - 1], to_=self.rank + 1)
|
||||
_mpi_send_array(
|
||||
self.comm, self.singvals[self.num_sites - 1], self.rank + 1
|
||||
)
|
||||
|
||||
else:
|
||||
res = []
|
||||
# Receive the information from the MPS on the right
|
||||
if pos == self.indexes[self.rank + 1] and self.rank < self.size - 1:
|
||||
tens = self.mpi_receive_tensor(from_=self.rank + 1)
|
||||
|
||||
self[self.num_sites - 1] = tens
|
||||
|
||||
singvals = _mpi_recv_array(
|
||||
self.comm,
|
||||
self.singvals[self.num_sites],
|
||||
tens.shape[2],
|
||||
self.rank + 1,
|
||||
)
|
||||
self._singvals[self.num_sites] = singvals
|
||||
|
||||
# Receive the information from the MPS from the left
|
||||
if pos == self.indexes[self.rank] - 1 and self.rank > 0:
|
||||
tens = self.mpi_receive_tensor(from_=self.rank - 1)
|
||||
self[0] = tens
|
||||
|
||||
singvals = _mpi_recv_array(
|
||||
self.comm,
|
||||
self.singvals[0],
|
||||
tens.shape[0],
|
||||
self.rank - 1,
|
||||
)
|
||||
self._singvals[0] = singvals
|
||||
|
||||
return res
|
||||
|
||||
def apply_projective_operator(self, site, selected_output=None, remove=False):
|
||||
"""
|
||||
Apply a projective operator to the site **site**, and give the measurement as output.
|
||||
You can also decide to select a given output for the measurement, if the probability is
|
||||
non-zero. Finally, you have the possibility of removing the site after the measurement.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
site: int
|
||||
Index of the site you want to measure
|
||||
selected_output: int, optional
|
||||
If provided, the selected state is measured. Throw an error if the probability of the
|
||||
state is 0
|
||||
remove: bool, optional
|
||||
If True, the measured index is traced away after the measurement. Default to False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
meas_state: int | None
|
||||
Measured state or None if site not in this part of the MPI-MPS.
|
||||
state_prob : float | None
|
||||
Probability of measuring the output state or None if site not
|
||||
in this part of the MPI-MPS.
|
||||
"""
|
||||
self.reinstall_isometry_serial()
|
||||
if site in self.par_map:
|
||||
res = super().apply_projective_operator(
|
||||
self.par_map[site], selected_output, remove
|
||||
)
|
||||
else:
|
||||
res = (None, None)
|
||||
|
||||
# Move informations to further right
|
||||
self.reinstall_isometry_serial(left=False, from_site=site)
|
||||
# Move information to the left
|
||||
self.reinstall_isometry_serial()
|
||||
|
||||
return res
|
||||
|
||||
# pylint: disable-next=arguments-differ
|
||||
def reinstall_isometry_serial(self, left=False, from_site=None):
|
||||
"""
|
||||
Reinstall the isometry center on position 0 of the full MPS.
|
||||
|
||||
This step is serial because we have to serially pass the information
|
||||
along the MPS. It cannot be parallelized.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
left: bool, optional
|
||||
If True, reinstall the isometry to the left.
|
||||
If False, to the right. Defaulto to False
|
||||
from_site: int, optional
|
||||
The site from which the isometrization should start.
|
||||
By default None, i.e. the other end of the MPS chain.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
if from_site is None:
|
||||
from_site = self.num_sites - 1 if left else 0
|
||||
extrem = np.nonzero(from_site <= self.indexes)[0][0]
|
||||
|
||||
if left:
|
||||
boundaries = (extrem, -1, -1)
|
||||
tidx = 0
|
||||
to_ = self.rank - 1
|
||||
from_ = self.rank + 1
|
||||
else:
|
||||
boundaries = (extrem, self.size, 1)
|
||||
tidx = self.num_sites - 1
|
||||
to_ = self.rank + 1
|
||||
from_ = self.rank - 1
|
||||
|
||||
for ii in range(*boundaries):
|
||||
if self.rank == ii:
|
||||
self._first_non_orthogonal_left = self.num_sites - 1
|
||||
self._first_non_orthogonal_right = self.num_sites - 1
|
||||
requires_singvals = self._requires_singvals
|
||||
self._requires_singvals = True
|
||||
if left:
|
||||
self.right_canonize(0, False, True)
|
||||
else:
|
||||
self.left_canonize(self.num_sites - 1, False, True)
|
||||
self._requires_singvals = requires_singvals
|
||||
|
||||
# Send tensor
|
||||
if (self.rank > 0 and left) or (self.rank + 1 < self.size and not left):
|
||||
self.mpi_send_tensor(self[tidx], to_=to_)
|
||||
|
||||
elif (self.rank == ii - 1 and left) or (self.rank == ii + 1 and not left):
|
||||
# Receive tensor
|
||||
tens = self.mpi_receive_tensor(from_=from_)
|
||||
self[self.num_sites - 1 - tidx] = tens
|
||||
|
||||
# pylint: disable-next=arguments-differ
|
||||
def reinstall_isometry_parallel(self, num_cycles):
|
||||
"""
|
||||
Reinstall the isometry by applying identities to all even sites and
|
||||
to all odd sites, and repeating for `num_cycles` cycles.
|
||||
The reinstallation is exact for `num_cycles=num_sites/2`.
|
||||
Method from https://arxiv.org/abs/2312.02667
|
||||
|
||||
This step is serial because we have to serially pass the information
|
||||
along the MPS. It cannot be parallelized.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
num_cycles: int
|
||||
Number of cycles for reinstalling the isometry
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
for _ in range(num_cycles):
|
||||
# Apply on all even sites
|
||||
for ii in range(0, self.tot_sites - 1, 2):
|
||||
self.apply_two_site_operator(
|
||||
self[0].eye_like(4), ii, svd=True, parallel=True
|
||||
)
|
||||
# Apply on all odd sites
|
||||
for ii in range(1, self.tot_sites - 1, 2):
|
||||
self.apply_two_site_operator(
|
||||
self[0].eye_like(4), ii, svd=True, parallel=True
|
||||
)
|
||||
|
||||
def mpi_gather_tn(self):
|
||||
"""
|
||||
Gather the tensors on process 0.
|
||||
We do not use MPI.comm.Gather because we would gather lists of np.arrays
|
||||
without using the np.array advantages, making it slower than the single
|
||||
communications.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list on np.ndarray or None
|
||||
List of tensors on the rank 0 process, None on the others
|
||||
"""
|
||||
self.comm.Barrier()
|
||||
if self.rank != 0:
|
||||
num_tensors = (
|
||||
self.num_sites if self.rank == self.size - 1 else self.num_sites - 1
|
||||
)
|
||||
for jj in range(num_tensors):
|
||||
self.mpi_send_tensor(self[jj], to_=0)
|
||||
tensor_list = None
|
||||
else:
|
||||
tensor_list = [None for _ in range(self.tot_sites)]
|
||||
tensor_list[: self.num_sites - 1] = self.tensors[:-1]
|
||||
|
||||
tidx = self.num_sites - 1
|
||||
for ii in range(1, self.size):
|
||||
num_tensors = self.indexes[ii + 1] - self.indexes[ii]
|
||||
for jj in range(num_tensors):
|
||||
tens = self.mpi_receive_tensor(from_=ii)
|
||||
tensor_list[tidx + jj] = tens
|
||||
tidx += num_tensors
|
||||
|
||||
self.comm.Barrier()
|
||||
|
||||
return tensor_list
|
||||
|
||||
def mpi_scatter_tn(self, tensor_list):
|
||||
"""
|
||||
Scatter the tensors on process 0.
|
||||
We do not use MPI.comm.Scatter because we would gather lists of np.arrays
|
||||
without using the np.array advantages, making it slower than the single
|
||||
communications.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tensor_list : list of lists of np.ndarrays
|
||||
The index i of the list is sent to the rank i
|
||||
|
||||
Returns
|
||||
-------
|
||||
list on np.ndarray or None
|
||||
List of tensors on the rank 0 process, None on the others
|
||||
"""
|
||||
self.comm.Barrier()
|
||||
if self.rank == 0:
|
||||
for ridx, sub_tensorlist in enumerate(tensor_list[1:]):
|
||||
for idx, tens in enumerate(sub_tensorlist):
|
||||
self.mpi_send_tensor(tens, to_=ridx + 1)
|
||||
|
||||
tensor_list = tensor_list[0]
|
||||
else:
|
||||
num_tensors = len(tensor_list[self.rank])
|
||||
tensor_list = [None for _ in range(num_tensors)]
|
||||
for idx in range(num_tensors):
|
||||
tens = self.mpi_receive_tensor(from_=0)
|
||||
tensor_list[idx] = tens
|
||||
|
||||
self.comm.Barrier()
|
||||
|
||||
return tensor_list
|
||||
|
||||
def to_tensor_list(self):
|
||||
"""
|
||||
Return the tensor list of the full MPS. Thus, here there are
|
||||
communications between the different processes and all the tensorlist
|
||||
is returned on process 0
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of np.ndarray or None
|
||||
List of tensors on the rank 0 process, None on the others
|
||||
"""
|
||||
return self.mpi_gather_tn()
|
||||
|
||||
def to_statevector(self, qiskit_order=False, max_qubit_equivalent=20):
|
||||
"""
|
||||
Serially compute the statevector
|
||||
|
||||
Parameters
|
||||
----------
|
||||
qiskit_order: bool, optional
|
||||
weather to use qiskit ordering or the theoretical one. For
|
||||
example the state |011> has 0 in the first position for the
|
||||
theoretical ordering, while for qiskit ordering it is on the
|
||||
last position.
|
||||
max_qubit_equivalent: int, optional
|
||||
Maximum number of qubit sites the MPS can have and still be
|
||||
transformed into a statevector.
|
||||
If the number of sites is greater, it will throw an exception.
|
||||
Default to 20.
|
||||
|
||||
Returns
|
||||
-------
|
||||
np.ndarray or None
|
||||
Statevector on process 0, None on the others
|
||||
"""
|
||||
|
||||
tensorlist = self.to_tensor_list()
|
||||
if self.rank == 0:
|
||||
mps = MPS.from_tensor_list(tensorlist)
|
||||
statevect = mps.to_statevector(qiskit_order, max_qubit_equivalent)
|
||||
else:
|
||||
statevect = None
|
||||
|
||||
return statevect
|
||||
|
||||
@classmethod
|
||||
def from_tensor_list(
|
||||
cls,
|
||||
tensor_list,
|
||||
conv_params=None,
|
||||
tensor_backend=None,
|
||||
target_device=None,
|
||||
):
|
||||
"""
|
||||
Initialize the MPS tensors using a list of correctly shaped tensors
|
||||
|
||||
Parameters
|
||||
----------
|
||||
tensor_list : list of ndarrays or cupy arrays
|
||||
List of tensor for initializing the MPS
|
||||
conv_params : :py:class:`TNConvergenceParameters`, optional
|
||||
Convergence parameters for the new MPS. If None, the maximum bond
|
||||
bond dimension possible is assumed, and a cut_ratio=1e-9.
|
||||
Default to None.
|
||||
tensor_backend : `None` or instance of :class:`TensorBackend`
|
||||
Default for `None` is :class:`QteaTensor` with np.complex128 on CPU.
|
||||
target_device: None | str, optional
|
||||
If `None`, take memory device of tensor backend.
|
||||
If string is `any`, do not convert. Otherwise,
|
||||
use string as device string.
|
||||
|
||||
Returns
|
||||
-------
|
||||
obj : :py:class:`MPIMPS`
|
||||
The MPIMPS class
|
||||
"""
|
||||
mismatches = [
|
||||
tensor_list[ii].shape[2] != tensor_list[ii + 1].shape[0]
|
||||
for ii in range(len(tensor_list) - 1)
|
||||
]
|
||||
if any(mismatches):
|
||||
msg = f"Mismatches for tensors equals to True: {mismatches}."
|
||||
raise ValueError(f"Dimension mismatch when constructing MPS:{msg}")
|
||||
|
||||
if conv_params is None:
|
||||
max_bond_dim = max(elem.shape[2] for elem in tensor_list)
|
||||
conv_params = TNConvergenceParameters(max_bond_dimension=int(max_bond_dim))
|
||||
if tensor_backend is None:
|
||||
# Have to resolve it here in case target device is not given
|
||||
tensor_backend = TensorBackend()
|
||||
if target_device is None:
|
||||
target_device = tensor_backend.memory_device
|
||||
elif target_device == "any":
|
||||
target_device = None
|
||||
|
||||
local_dim = [elem.shape[1] for elem in tensor_list]
|
||||
obj = cls(
|
||||
len(tensor_list), conv_params, local_dim, tensor_backend=tensor_backend
|
||||
)
|
||||
|
||||
# Convert data type (lateron device if GPU enabled?)
|
||||
for elem in tensor_list:
|
||||
elem.convert(obj.tensor_backend.dtype, target_device)
|
||||
|
||||
if obj.rank == 0:
|
||||
tensorlist = [
|
||||
tensor_list[
|
||||
obj.indexes[rank] : obj.indexes[rank + 1]
|
||||
+ int(rank != obj.size - 1)
|
||||
]
|
||||
for rank in range(obj.size)
|
||||
]
|
||||
else:
|
||||
list_sizes = obj.indexes[1:] - obj.indexes[:-1] + 1
|
||||
list_sizes[-1] -= 1
|
||||
tensorlist = [
|
||||
[None for _ in range(list_sizes[rank])] for rank in range(obj.size)
|
||||
]
|
||||
|
||||
tensor_list = obj.mpi_scatter_tn(tensorlist)
|
||||
obj._tensors = tensor_list
|
||||
|
||||
return obj
|
||||
|
||||
@classmethod
|
||||
def from_statevector(
|
||||
cls,
|
||||
statevector,
|
||||
local_dim=2,
|
||||
conv_params=None,
|
||||
tensor_backend=None,
|
||||
):
|
||||
"""Serially decompose the statevector and then initialize the MPS"""
|
||||
mps = MPS.from_statevector(
|
||||
statevector, local_dim, conv_params, tensor_backend=tensor_backend
|
||||
)
|
||||
|
||||
return cls.from_tensor_list(
|
||||
mps.to_tensor_list(), conv_params, tensor_backend=tensor_backend
|
||||
)
|
||||
|
||||
# ---------------------------
|
||||
# ----- MEASURE METHODS -----
|
||||
# ---------------------------
|
||||
|
||||
def meas_local(self, op_list):
|
||||
"""
|
||||
Measure a local observable along all sites of the MPS
|
||||
|
||||
Parameters
|
||||
----------
|
||||
op_list : list of :class:`_AbstractQteaTensor`
|
||||
local operator to measure on each site
|
||||
|
||||
Return
|
||||
------
|
||||
measures : ndarray, shape (num_sites)
|
||||
Measures of the local operator along each site on rank-0
|
||||
"""
|
||||
res = super().meas_local(op_list)
|
||||
|
||||
# Call back on the site 0 the results
|
||||
if self.rank != 0:
|
||||
self.comm.Send([res, self.mpi_dtype[res.dtype.str]], 0)
|
||||
tot_res = None
|
||||
else:
|
||||
tot_res = np.empty(self.tot_sites, dtype=res.dtype)
|
||||
tot_res[: self.num_sites - 1] = res[:-1]
|
||||
|
||||
tidx = self.num_sites - 1
|
||||
for ii in range(1, self.size):
|
||||
num_tensors = self.indexes[ii] - self.indexes[ii - 1]
|
||||
self.comm.Recv(
|
||||
[tot_res[tidx : tidx + num_tensors], self.mpi_dtype[res.dtype.str]],
|
||||
ii,
|
||||
)
|
||||
tidx += num_tensors
|
||||
|
||||
return tot_res
|
||||
|
||||
def _get_eff_op_on_pos(self, pos):
|
||||
"""
|
||||
Obtain the list of effective operators adjacent
|
||||
to the position pos and the index where they should
|
||||
be contracted
|
||||
|
||||
Parameters
|
||||
----------
|
||||
pos : int
|
||||
Index of the tensor w.r.t. which we have to retrieve
|
||||
the effective operators
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of IndexedOperators
|
||||
List of effective operators
|
||||
list of ints
|
||||
Indexes where the operators should be contracted
|
||||
"""
|
||||
raise NotImplementedError("This function has to be overwritten")
|
||||
@@ -0,0 +1,416 @@
|
||||
# This code is part of qtealeaves.
|
||||
#
|
||||
# This code is licensed under the Apache License, Version 2.0. You may
|
||||
# obtain a copy of this license in the LICENSE.txt file in the root directory
|
||||
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
#
|
||||
# Any modifications or derivative works of this code must retain this
|
||||
# copyright notice, and modified files need to carry a notice indicating
|
||||
# that they have been altered from the originals.
|
||||
|
||||
"""
|
||||
Generic base class for operators.
|
||||
"""
|
||||
|
||||
# pylint: disable=too-many-locals
|
||||
|
||||
# pylint: disable-next=no-name-in-module
|
||||
from collections import OrderedDict
|
||||
|
||||
import numpy as np
|
||||
|
||||
from qtealeaves.tooling.operatorstrings import _op_string_mul
|
||||
from qtealeaves.tooling.parameterized import _ParameterizedClass
|
||||
|
||||
__all__ = ["TNOperators"]
|
||||
|
||||
|
||||
class _DefaultMapping:
|
||||
"""Callable default mapping to avoid recreating closures during lookups."""
|
||||
|
||||
def __init__(self, default_key):
|
||||
self.default_key = default_key
|
||||
|
||||
# pylint: disable-next=unused-argument
|
||||
def __call__(self, site_idx):
|
||||
return self.default_key
|
||||
|
||||
|
||||
class TNOperators(_ParameterizedClass):
|
||||
"""
|
||||
Generic class to write operators. This class contains no pre-defined
|
||||
operators. It allows you to start from scratch if no other operator
|
||||
class fulfills your needs.
|
||||
|
||||
**Arguments**
|
||||
|
||||
set_names : list of str, optional
|
||||
Name of the operators sets.
|
||||
Default to `default`
|
||||
|
||||
mapping_func : callable (or `None`), optional
|
||||
Mapping the site index to an operator. Arguments
|
||||
`site_idx` must be accepted.
|
||||
Default to `None` (default mapping to only operator set)
|
||||
"""
|
||||
|
||||
def __init__(self, set_names="default", mapping_func=None):
|
||||
if isinstance(set_names, str):
|
||||
set_names = [set_names]
|
||||
|
||||
self._ops_dicts = {}
|
||||
for name in set_names:
|
||||
if not isinstance(name, str):
|
||||
raise TypeError(f"Set names must be str, but got `{type(name)}`.")
|
||||
self._ops_dicts[name] = OrderedDict()
|
||||
self._set_names = tuple(self._ops_dicts.keys())
|
||||
self._one_unique = len(self._set_names) == 1
|
||||
|
||||
if mapping_func is None:
|
||||
self._mapping_func = _DefaultMapping(self._set_names[0])
|
||||
else:
|
||||
self._mapping_func = mapping_func
|
||||
|
||||
# Mapping of operators (to avoid equal operators being defined twice)
|
||||
# Can be set, e.g., for 2nd order operators.
|
||||
self._has_2nd_order = False
|
||||
self._mapping_op = {}
|
||||
|
||||
@property
|
||||
def one_unique(self):
|
||||
"""Flag if only one operators set exists (True) or multiple (False)."""
|
||||
return self._one_unique
|
||||
|
||||
@property
|
||||
def mapping_func(self):
|
||||
"""Mapping function for site to operator set name."""
|
||||
return self._mapping_func
|
||||
|
||||
@property
|
||||
def set_names(self):
|
||||
"""Return operator set names as list of strings."""
|
||||
return list(self._set_names)
|
||||
|
||||
def __len__(self):
|
||||
"""Lenght of TNOperators defined as number of operator sets."""
|
||||
return len(self._ops_dicts)
|
||||
|
||||
def __contains__(self, key):
|
||||
"""Check if a key is inside the operators."""
|
||||
key_a, key_b = self._parse_key(key)
|
||||
if key_a not in self._ops_dicts:
|
||||
return False
|
||||
|
||||
return key_b in self._ops_dicts[key_a]
|
||||
|
||||
def __delitem__(self, key):
|
||||
"""Delete entry in operators."""
|
||||
key_a, key_b = self._parse_key(key)
|
||||
del self._ops_dicts[key_a][key_b]
|
||||
|
||||
def __getitem__(self, key):
|
||||
"""Extract entry by key."""
|
||||
key_a, key_b = self._parse_key(key)
|
||||
return self._ops_dicts[key_a][key_b]
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
"""Set entry by key."""
|
||||
key_a, key_b = self._parse_key(key, callee_set=True)
|
||||
self._ops_dicts[key_a][key_b] = value
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterate through all keys (of all operators sets)."""
|
||||
for key, value in self._ops_dicts.items():
|
||||
for subkey in value:
|
||||
yield (key, subkey)
|
||||
|
||||
def items(self):
|
||||
"""Iterate throught all (key, value) pairs of all operators sets."""
|
||||
for key, value in self._ops_dicts.items():
|
||||
for subkey, subvalue in value.items():
|
||||
yield (key, subkey), subvalue
|
||||
|
||||
def _parse_key(self, key, callee_set=False):
|
||||
"""
|
||||
Parse the key and split into operator set key and operator name key.
|
||||
|
||||
**Arguments**
|
||||
|
||||
key : tuple (or str)
|
||||
Key as tuple of length two (or operator name).
|
||||
|
||||
callee_set : bool, optional
|
||||
Indicate if callee is `__setitem__`.
|
||||
Default to `False`.
|
||||
"""
|
||||
if isinstance(key, str) and self.one_unique:
|
||||
return self._set_names[0], key
|
||||
|
||||
if isinstance(key, str):
|
||||
raise ValueError("Operators are not unique, indicate index.")
|
||||
|
||||
if len(key) != 2:
|
||||
raise ValueError("Operators are not unique, indicate index.")
|
||||
|
||||
if isinstance(key[0], str):
|
||||
# str for operator set name
|
||||
key_0 = key[0]
|
||||
elif isinstance(key[0], (int, np.int64)) and callee_set:
|
||||
raise ValueError("Cannot set entry via integer entry (per site).")
|
||||
elif isinstance(key[0], (int, np.int64)):
|
||||
# int for site, use mapping
|
||||
# pylint: disable-next=not-callable
|
||||
key_0 = self.mapping_func(key[0])
|
||||
else:
|
||||
raise ValueError(f"First entry must be set name or int, but `{key[0]}`.")
|
||||
|
||||
if not isinstance(key[1], str):
|
||||
raise ValueError(
|
||||
f"Second entry must specify operator name, but `{key[1]}`."
|
||||
)
|
||||
|
||||
return key_0, key[1]
|
||||
|
||||
def get_operator(self, site_idx_1d, operator_name, params):
|
||||
"""
|
||||
Provide a method to return any operator, either defined via
|
||||
a callable or directly as a matrix.
|
||||
|
||||
**Arguments**
|
||||
|
||||
site_idx_1d : int, str
|
||||
If int, site where we need the operator. Mapping will evaluate what
|
||||
to return.
|
||||
If str, name of operator set.
|
||||
|
||||
operator_name : str
|
||||
Tag/identifier of the operator.
|
||||
|
||||
params : dict
|
||||
Simulation parameters as a dictionary; dict is passed
|
||||
to callable.
|
||||
"""
|
||||
if isinstance(site_idx_1d, (int, np.int64)):
|
||||
# pylint: disable-next=not-callable
|
||||
key_0 = self._mapping_func(site_idx_1d)
|
||||
else:
|
||||
key_0 = site_idx_1d
|
||||
op_mat = self.eval_numeric_param(self._ops_dicts[key_0][operator_name], params)
|
||||
return op_mat
|
||||
|
||||
def get_local_links(self, num_sites, params):
|
||||
"""
|
||||
Extract the local links from the operators.
|
||||
|
||||
**Arguments**
|
||||
|
||||
num_sites : integer
|
||||
Number of sites.
|
||||
|
||||
params : dict
|
||||
Dictionary with parameterization of the simulation.
|
||||
"""
|
||||
local_links = []
|
||||
for ii in range(num_sites):
|
||||
eye = self.get_operator(ii, "id", params)
|
||||
|
||||
if hasattr(eye, "links"):
|
||||
local_links.append(eye.links[1])
|
||||
else:
|
||||
# When constructing H, we call this with numpy tensors
|
||||
local_links.append(eye.shape[0])
|
||||
|
||||
return local_links
|
||||
|
||||
def transform(self, transformation, **kwargs):
|
||||
"""
|
||||
Generate a new :class:`TNOperators` by transforming the
|
||||
current instance.
|
||||
|
||||
**Arguments**
|
||||
|
||||
transformation : callable
|
||||
Accepting key and value as arguments plus potential
|
||||
keyword arguments.
|
||||
|
||||
**kwargs : key-word arguments
|
||||
Will be passed to `transformation`
|
||||
"""
|
||||
new_ops = TNOperators(set_names=self.set_names, mapping_func=self.mapping_func)
|
||||
for key, value in self.items():
|
||||
new_ops[key] = transformation(key, value, **kwargs)
|
||||
|
||||
return new_ops
|
||||
|
||||
def check_alternative_op(self, set_name, key):
|
||||
"""
|
||||
Check entry for alternative operators, i.e., the sigma_x squared
|
||||
is the identity.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
|
||||
set_name : str
|
||||
Search in this set name of operators. (Set names allow
|
||||
different Hilbert spaces on different sites.)
|
||||
|
||||
key : str
|
||||
Operator represented as key. Check if there is an alternative
|
||||
key for this key.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
alternative_key : None | str
|
||||
If `None`, no alternative key is given or the corresponding
|
||||
dictionary for checking is not set. If str, then this operator
|
||||
has the same representation as `key`.
|
||||
"""
|
||||
set_dict = self._mapping_op.get(set_name, None)
|
||||
if set_dict is None:
|
||||
return None
|
||||
|
||||
return self._mapping_op[set_name].get(key, None)
|
||||
|
||||
# pylint: disable-next=too-many-branches
|
||||
def generate_products_2nd_order(
|
||||
self,
|
||||
left_conj=False,
|
||||
left_transpose=False,
|
||||
right_conj=False,
|
||||
right_transpose=False,
|
||||
):
|
||||
"""
|
||||
Generate all possible multiplications (matrix-matrix multiplications) of
|
||||
the operator set, i.e., [A, B, ...] generators [A*A, A*B, B*A, B*B, ...].
|
||||
Transformation can be taken into account on top.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
|
||||
left_conj : Boolean
|
||||
Tells if the left operator needs to be complex conjugated.
|
||||
Default is False.
|
||||
|
||||
right_transpose : Boolean
|
||||
Tells if the left operator needs to be transposed.
|
||||
Default is False.
|
||||
|
||||
right_conj : Boolean
|
||||
Tells if the right operator needs to be complex conjugated.
|
||||
Default is False.
|
||||
|
||||
right_transpose : Boolean
|
||||
Tells if the right operator needs to be transposed.
|
||||
Default is False.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
None (in-place update of the operator dictionary)
|
||||
|
||||
"""
|
||||
if self._has_2nd_order:
|
||||
return
|
||||
|
||||
self._has_2nd_order = True
|
||||
|
||||
additional_ops = {}
|
||||
for set_name in self.set_names:
|
||||
additional_ops[set_name] = {}
|
||||
|
||||
for op_str_a, op_a in self._ops_dicts[set_name].items():
|
||||
for op_str_b, op_b in self._ops_dicts[set_name].items():
|
||||
op_str = _op_string_mul("", op_str_a, left_conj, left_transpose)
|
||||
op_str = _op_string_mul(
|
||||
op_str, op_str_b, right_conj, right_transpose
|
||||
)
|
||||
op_str = op_str_a + "*" + op_str_b
|
||||
if (op_str_a == "id") and (op_str_b == "id"):
|
||||
additional_ops[set_name][op_str] = "id"
|
||||
elif (
|
||||
(op_str_a == "id")
|
||||
and (not right_conj)
|
||||
and (not right_transpose)
|
||||
):
|
||||
additional_ops[set_name][op_str] = op_str_b
|
||||
elif (
|
||||
(op_str_b == "id") and (not left_conj) and (not left_transpose)
|
||||
):
|
||||
additional_ops[set_name][op_str] = op_str_a
|
||||
elif op_a.has_symmetry:
|
||||
tmp_a = _op_transformation(op_a, left_conj, left_transpose)
|
||||
tmp_b = _op_transformation(op_b, right_conj, right_transpose)
|
||||
|
||||
op = tmp_a.tensordot(tmp_b, [(2,), (1,)])
|
||||
_, op = op.split_qr([0, 3, 1, 4], [2, 5])
|
||||
op, _ = op.split_rq([0, 1], [2, 3, 4])
|
||||
additional_ops[set_name][op_str] = op
|
||||
else:
|
||||
tmp_a = _op_transformation(op_a, left_conj, left_transpose)
|
||||
tmp_b = _op_transformation(op_b, right_conj, right_transpose)
|
||||
|
||||
op = tmp_a.einsum("ijkl,akbd->ijbl", tmp_b)
|
||||
additional_ops[set_name][op_str] = op
|
||||
|
||||
# Check they are really new and not identical to existing ones
|
||||
# to get the smallest set
|
||||
to_be_added = {}
|
||||
to_be_reset = {}
|
||||
for key, op in additional_ops[set_name].items():
|
||||
if isinstance(op, str):
|
||||
continue
|
||||
|
||||
for key_ii, op_ii in self._ops_dicts[set_name].items():
|
||||
if op.are_equal(op_ii, tol=10 * op.dtype_eps):
|
||||
to_be_reset[key] = key_ii
|
||||
else:
|
||||
to_be_added[key] = op
|
||||
continue
|
||||
|
||||
for key, value in to_be_added.items():
|
||||
self._ops_dicts[set_name][key] = value
|
||||
|
||||
for key, value in to_be_reset.items():
|
||||
additional_ops[set_name][key] = value
|
||||
|
||||
for set_name, set_dict in additional_ops.items():
|
||||
if set_name not in self._mapping_op:
|
||||
self._mapping_op[set_name] = {}
|
||||
|
||||
for key, value in set_dict.items():
|
||||
self._mapping_op[set_name][key] = value
|
||||
|
||||
|
||||
def _op_transformation(op, is_conj, is_transpose):
|
||||
"""
|
||||
Carry out the transformation on an operator.
|
||||
|
||||
Arguments
|
||||
---------
|
||||
|
||||
op : :class:`_AbstractQteaTensor`
|
||||
Tensor to be transformed. We assume rank-4 tensors.
|
||||
|
||||
is_conj : bool
|
||||
Flag if conjugate is applied.
|
||||
|
||||
is_transpose : bool
|
||||
Flag if transpose is applied.
|
||||
|
||||
Returns
|
||||
-------
|
||||
|
||||
new_op : :class:`_AbstractQteaTensor`
|
||||
Operator after the transformations.
|
||||
"""
|
||||
if is_conj and is_transpose:
|
||||
new_op = op.conj().transpose([0, 2, 1, 3])
|
||||
elif is_conj:
|
||||
new_op = op.conj()
|
||||
elif is_transpose:
|
||||
new_op = op.transpose([0, 2, 1, 3])
|
||||
else:
|
||||
new_op = op
|
||||
|
||||
return new_op
|
||||
1918
.venv/lib/python3.12/site-packages/qtealeaves/tooling/mapping.py
Normal file
1918
.venv/lib/python3.12/site-packages/qtealeaves/tooling/mapping.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,55 @@
|
||||
# This code is part of qtealeaves.
|
||||
#
|
||||
# This code is licensed under the Apache License, Version 2.0. You may
|
||||
# obtain a copy of this license in the LICENSE.txt file in the root directory
|
||||
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
|
||||
#
|
||||
# Any modifications or derivative works of this code must retain this
|
||||
# copyright notice, and modified files need to carry a notice indicating
|
||||
# that they have been altered from the originals.
|
||||
|
||||
"""
|
||||
Common permutations often used in tensor network methods.
|
||||
"""
|
||||
from functools import lru_cache
|
||||
|
||||
__all__ = []
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def _transpose_idx1(num_legs, contracted_idx):
|
||||
"""Move second last index instead of last in `_transpose_idx`."""
|
||||
return _transpose_idx(num_legs - 1, contracted_idx) + (num_legs - 1,)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def _transpose_idx2(num_legs, contracted_idx):
|
||||
"""Move third last index instead of last in `_transpose_idx`."""
|
||||
return _transpose_idx(num_legs - 2, contracted_idx) + (
|
||||
num_legs - 2,
|
||||
num_legs - 1,
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=None)
|
||||
def _transpose_idx(num_legs, contracted_idx):
|
||||
"""
|
||||
Transpose in the original order the indexes
|
||||
of a n-legs tensor contracted over the
|
||||
index `contracted_idx`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
contracted_idx : int
|
||||
Index over which there has been a contraction
|
||||
|
||||
Returns
|
||||
-------
|
||||
tuple
|
||||
Indexes for the transposition
|
||||
"""
|
||||
if contracted_idx > num_legs - 1:
|
||||
raise ValueError(
|
||||
f"Cannot contract leg {contracted_idx} of tensor with {num_legs} legs"
|
||||
)
|
||||
return (*range(contracted_idx), num_legs - 1, *range(contracted_idx, num_legs - 1))
|
||||
5410
.venv/lib/python3.12/site-packages/quimb/tensor/circuit.py
Normal file
5410
.venv/lib/python3.12/site-packages/quimb/tensor/circuit.py
Normal file
File diff suppressed because it is too large
Load Diff
11987
.venv/lib/python3.12/site-packages/quimb/tensor/tensor_core.py
Normal file
11987
.venv/lib/python3.12/site-packages/quimb/tensor/tensor_core.py
Normal file
File diff suppressed because it is too large
Load Diff
5060
.venv/lib/python3.12/site-packages/quimb/tensor/tn1d/core.py
Normal file
5060
.venv/lib/python3.12/site-packages/quimb/tensor/tn1d/core.py
Normal file
File diff suppressed because it is too large
Load Diff
21
README.md
21
README.md
@@ -28,15 +28,24 @@ Currently, the supported tensor network libraries are:
|
||||
|
||||
## CPU expectation benchmarks
|
||||
|
||||
The current CPU expectation entrypoint is:
|
||||
Use the library APIs directly:
|
||||
|
||||
```sh
|
||||
python -u benchmark_cpu_expectation.py --ansatz mps --nqubits 40 --nlayers 10 --bond 2048 --circuits brickwall_cnot --observables ring_xz
|
||||
```py
|
||||
import qibotn
|
||||
|
||||
records = qibotn.run_cpu_benchmark_cases(
|
||||
ansatz="mps",
|
||||
nqubits=40,
|
||||
nlayers=10,
|
||||
bond=2048,
|
||||
circuits=("brickwall_cnot",),
|
||||
observables=("ring_xz",),
|
||||
)
|
||||
```
|
||||
|
||||
Use `--ansatz tn` for the generic TN path and `--mpi` under `mpiexec` for MPI runs.
|
||||
Reusable circuit and observable builders live in `src/qibotn/benchmark_cases.py`; execution logic lives in `src/qibotn/expectation_runner.py`.
|
||||
For Vidal/MPS 1D-chain scale tests, use `run_vidal_mps_cases.sh`.
|
||||
For generic TN use `ansatz="tn"`. Contest/custom runners are available as
|
||||
`qibotn.run_contest_tn_case`, `qibotn.run_custom_tn_expectation`,
|
||||
`qibotn.run_contest_mps_case`, and `qibotn.run_vidal_validation_cases`.
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
@@ -1,285 +0,0 @@
|
||||
"""CLI for CPU TN/MPS expectation benchmarks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from qibotn.benchmark_cases import (
|
||||
CIRCUITS,
|
||||
OBSERVABLES,
|
||||
build_circuit,
|
||||
observable_terms,
|
||||
parse_names,
|
||||
terms_to_dict,
|
||||
)
|
||||
from qibotn.expectation_runner import (
|
||||
ExpectationConfig,
|
||||
exact_for_observable,
|
||||
run_cpu_expectation,
|
||||
)
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def should_stop_dask(args):
|
||||
return (
|
||||
not args.keep_dask
|
||||
and args.tn_search_backend == "dask"
|
||||
and args.dask_address is not None
|
||||
and args.tn_load_tree is None
|
||||
)
|
||||
|
||||
|
||||
def stop_dask_cluster(args, rank):
|
||||
if rank != 0 or not should_stop_dask(args):
|
||||
return
|
||||
script = Path(__file__).resolve().parent / "tools" / "manage_tn_dask_cluster.sh"
|
||||
if not script.exists():
|
||||
print(f"dask_stop_skipped reason=missing_script path={script}", flush=True)
|
||||
return
|
||||
|
||||
env = os.environ.copy()
|
||||
parsed = urlparse(args.dask_address)
|
||||
if parsed.hostname:
|
||||
env.setdefault("SCHEDULER_HOST", parsed.hostname)
|
||||
if parsed.port:
|
||||
env.setdefault("SCHEDULER_PORT", str(parsed.port))
|
||||
|
||||
print("dask_stop_after_search start", flush=True)
|
||||
subprocess.run([str(script), "stop"], cwd=str(script.parent.parent), env=env, check=False)
|
||||
print("dask_stop_after_search done", flush=True)
|
||||
|
||||
|
||||
def build_parallel_opts(args):
|
||||
slicing_opts = {}
|
||||
if args.tn_target_slices is not None:
|
||||
slicing_opts["target_slices"] = args.tn_target_slices
|
||||
if args.tn_target_size is not None:
|
||||
slicing_opts["target_size"] = args.tn_target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": args.tn_search_workers or args.torch_threads,
|
||||
"max_repeats": args.tn_search_repeats,
|
||||
"max_time": args.tn_search_time,
|
||||
"print_stats": not args.no_tn_stats,
|
||||
}
|
||||
if args.tn_search_backend is not None:
|
||||
opts["search_backend"] = args.tn_search_backend
|
||||
if args.dask_address is not None:
|
||||
opts["dask_address"] = args.dask_address
|
||||
if args.tn_save_tree is not None:
|
||||
opts["save_tree_path"] = args.tn_save_tree
|
||||
if args.tn_load_tree is not None:
|
||||
opts["load_tree_path"] = args.tn_load_tree
|
||||
if args.tn_search_only:
|
||||
opts["search_only"] = True
|
||||
if args.tn_debug_trials:
|
||||
opts["debug_trials"] = True
|
||||
if args.tn_contract_implementation is not None:
|
||||
opts["contract_implementation"] = args.tn_contract_implementation
|
||||
if args.dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
return opts
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=40)
|
||||
parser.add_argument("--nlayers", type=int, default=30)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=1024)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--quimb-backend", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument(
|
||||
"--dtype",
|
||||
choices=("complex128", "complex64"),
|
||||
default="complex128",
|
||||
)
|
||||
parser.add_argument("--ansatz", choices=("tn", "mps"), default=None)
|
||||
parser.add_argument("--mps", action="store_true")
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--circuits", nargs="+", default=["brickwall_cnot"])
|
||||
parser.add_argument("--observables", nargs="+", default=["ring_xz"])
|
||||
parser.add_argument("--pauli-pattern")
|
||||
parser.add_argument("--tn-target-slices", type=int)
|
||||
parser.add_argument("--tn-target-size", type=int,default=2**32)
|
||||
parser.add_argument("--tn-search-workers", type=int)
|
||||
parser.add_argument("--tn-search-repeats", type=int, default=128)
|
||||
parser.add_argument("--tn-search-time", type=float, default=60.0)
|
||||
parser.add_argument(
|
||||
"--no-tn-stats",
|
||||
action="store_true",
|
||||
help="Do not print per-term TN search/contraction diagnostics.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-search-backend",
|
||||
choices=("processpool", "dask"),
|
||||
default="dask",
|
||||
help="Path-search backend. In MPI mode, dask search runs only on rank 0 and broadcasts the tree.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dask-address",
|
||||
help="Dask scheduler address, for example tcp://host:8786. If omitted with dask search, a local cluster is created.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dask-close-workers",
|
||||
action="store_true",
|
||||
help="After dask path search, ask the scheduler to close all currently connected workers.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-dask",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Keep an external dask cluster running after search. By default, "
|
||||
"tools/manage_tn_dask_cluster.sh stop is called after search when "
|
||||
"--dask-address is used."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-save-tree",
|
||||
help="Save searched cotengra contraction tree(s) to this pickle file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-load-tree",
|
||||
help="Load cotengra contraction tree(s) from this pickle file and skip path search.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-search-only",
|
||||
action="store_true",
|
||||
help="Only run path search and optional --tn-save-tree; skip contraction.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-debug-trials",
|
||||
action="store_true",
|
||||
help="Print dask worker summary and per-trial worker start/done logs.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-contract-implementation",
|
||||
choices=("auto", "cotengra", "autoray", "cpp"),
|
||||
help="cotengra contraction implementation for TN contraction.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
ansatz = "mps" if args.mps else (args.ansatz or "tn")
|
||||
circuits = parse_names(args.circuits, CIRCUITS, "circuits")
|
||||
observables = [] if args.pauli_pattern else parse_names(
|
||||
args.observables, OBSERVABLES, "observables"
|
||||
)
|
||||
|
||||
rank = 0
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz=ansatz,
|
||||
mpi=args.mpi,
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=args.quimb_backend,
|
||||
dtype=args.dtype,
|
||||
torch_threads=args.torch_threads,
|
||||
parallel_opts=build_parallel_opts(args),
|
||||
)
|
||||
|
||||
if rank == 0:
|
||||
mode = "MPI" if args.mpi else "serial"
|
||||
print(
|
||||
f"backend=cpu ansatz={ansatz.upper()} mode={mode} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={format_optional(args.bond)} "
|
||||
f"cut_ratio={format_optional(args.cut_ratio)} seed={args.seed} "
|
||||
f"quimb_backend={args.quimb_backend} dtype={args.dtype} "
|
||||
f"torch_threads={args.torch_threads} "
|
||||
f"tn_search_backend={args.tn_search_backend}"
|
||||
)
|
||||
print("circuit observable exact value abs_error rel_error seconds")
|
||||
|
||||
try:
|
||||
for circuit_kind in circuits:
|
||||
circuit = build_circuit(circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
named_observables = (
|
||||
[(f"pattern:{args.pauli_pattern}", {"pauli_string_pattern": args.pauli_pattern})]
|
||||
if args.pauli_pattern
|
||||
else [
|
||||
(obs_kind, terms_to_dict(observable_terms(obs_kind, args.nqubits)))
|
||||
for obs_kind in observables
|
||||
]
|
||||
)
|
||||
|
||||
for obs_name, observable in named_observables:
|
||||
exact = None
|
||||
if args.exact and rank == 0:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, observable, args.nqubits)
|
||||
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if args.mpi and result.rank != 0:
|
||||
continue
|
||||
|
||||
abs_error = float("nan") if exact is None else abs(result.value - exact)
|
||||
rel_error = (
|
||||
float("nan")
|
||||
if exact is None
|
||||
else abs_error / max(abs(exact), 1e-15)
|
||||
)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"{circuit_kind} {obs_name} {exact_text} {result.value:.16e} "
|
||||
f"{abs_error:.6e} {rel_error:.6e} {result.seconds:.3f}"
|
||||
)
|
||||
for stat in result.parallel_stats or ():
|
||||
cost = stat["path_cost"]
|
||||
search_stats = stat.get("search_stats", {})
|
||||
print(
|
||||
"tn_term_summary "
|
||||
f"term={stat.get('term_index', 0)} "
|
||||
f"search_seconds={stat.get('search_seconds', float('nan')):.3f} "
|
||||
f"contract_seconds={stat.get('contract_seconds', float('nan')):.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', 'na')} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={cost['nslices']} "
|
||||
f"log10_flops={cost['log10_flops']:.3f} "
|
||||
f"log10_write={cost['log10_write']:.3f} "
|
||||
f"log2_size={cost['log2_size']:.3f} "
|
||||
f"log10_combo={cost['log10_combo']:.3f} "
|
||||
f"peak_memory_gib={cost['peak_memory_gib']:.6g} "
|
||||
f"slicing_overhead={cost['slicing_overhead']:.6g} "
|
||||
f"rank_slices={stat.get('rank_slices', 'na')}"
|
||||
)
|
||||
finally:
|
||||
stop_dask_cluster(args, rank)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,53 +1,12 @@
|
||||
# TN
|
||||
```bash
|
||||
# qibotn目录下
|
||||
I_MPI_FABRICS=shm:ofi \
|
||||
I_MPI_OFI_PROVIDER=tcp \
|
||||
FI_PROVIDER=tcp \
|
||||
CASE=main1 \
|
||||
OBSERVABLES=long_z_string \
|
||||
NQUBITS=34 \
|
||||
NLAYERS=20 \
|
||||
TORCH_THREADS=48 \
|
||||
SEARCH_REPEATS=2048 \
|
||||
SEARCH_TIME=300 \
|
||||
SCHEDULER_HOST=10.20.1.103 \
|
||||
WORKER_HOSTS="10.20.1.103 10.20.6.101" \
|
||||
DASK_ADDRESS="tcp://10.20.1.103:8786" \
|
||||
NWORKERS=84 \
|
||||
NTHREADS=1 \
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2" \
|
||||
tools/run_tn_dask_mpi_all.sh
|
||||
# Contest Runners
|
||||
|
||||
# 单独缩并contract计算
|
||||
The reusable implementations live in `src/qibotn/backends/`.
|
||||
|
||||
I_MPI_FABRICS=shm:ofi \
|
||||
I_MPI_OFI_PROVIDER=tcp \
|
||||
FI_PROVIDER=tcp \
|
||||
mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2 \
|
||||
.venv/bin/python -u tools/tn_contest_runner.py contract \
|
||||
--mpi \
|
||||
--case main1 \
|
||||
--nqubits 34 \
|
||||
--nlayers 20 \
|
||||
--observables long_z_string \
|
||||
--tree-dir trees/contest_tn \
|
||||
--torch-threads 48 \
|
||||
--dtype complex64
|
||||
```
|
||||
- `qibotn.run_contest_tn_case`: quimb+torch TN search/contract cases.
|
||||
- `qibotn.run_contest_mps_case`: Vidal/MPS contest expectation cases.
|
||||
- `qibotn.run_vidal_mpi_contest_case`: direct Vidal MPI observable sweep.
|
||||
- `qibotn.run_custom_tn_expectation`: custom quimb+torch TN cases.
|
||||
|
||||
# MPS
|
||||
```
|
||||
cd /home/yx/qibotn
|
||||
|
||||
I_MPI_FABRICS=shm:ofi \
|
||||
I_MPI_OFI_PROVIDER=tcp \
|
||||
FI_PROVIDER=tcp \
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2" \
|
||||
TORCH_THREADS=48 \
|
||||
OBS_FILTER=ring_xz \
|
||||
MAIN1_NQ=128 \
|
||||
MAIN1_LAYERS=24 \
|
||||
MAIN1_BOND=1024 \
|
||||
tools/run_vidal_mpi_contest_cases.sh main1
|
||||
```
|
||||
`src/qibotn/backends/quimb.py` holds the TN helpers,
|
||||
`src/qibotn/backends/qmatchatea.py` holds the qmatchatea MPS helpers,
|
||||
and `src/qibotn/backends/vidal.py` holds the Vidal helpers.
|
||||
|
||||
26
docs/home.md
Normal file
26
docs/home.md
Normal file
@@ -0,0 +1,26 @@
|
||||
# qibotn
|
||||
|
||||
Core reusable code lives under `src/qibotn/`. Prefer importing from `qibotn`
|
||||
or `qibotn.backends.*`; benchmark and runner helpers have been folded into the
|
||||
package instead of being kept as standalone scripts.
|
||||
|
||||
- `backends/quimb.py`: TN + torch helpers for quimb.
|
||||
- `backends/qmatchatea.py`: qmatchatea + torch MPS helpers.
|
||||
- `backends/vidal.py`: Vidal + torch helpers.
|
||||
- `contest_cases.py`: shared contest circuits, observables, and case specs.
|
||||
- `torch_utils.py`: shared torch array/thread helpers.
|
||||
|
||||
Quimb TN reusable entrypoints include `build_quimb_backend_circuit`,
|
||||
`build_expectation_tn`, `run_quimb_torch_expectation`,
|
||||
`compare_quimb_gate_merge`, `compare_quimb_gate_merge_expectation`,
|
||||
`profile_quimb_torch_expectation`, and `time_quimb_contract_implementations`.
|
||||
|
||||
Common public imports include `qibotn.cpu_expectation`,
|
||||
`qibotn.mps_expectation`, `qibotn.run_qmatchatea_expectation`,
|
||||
`qibotn.run_vidal_expectation`, `qibotn.build_contest_circuit`, and
|
||||
`qibotn.build_contest_observable`.
|
||||
|
||||
Former script entrypoints are available as importable functions:
|
||||
`qibotn.run_cpu_benchmark_cases`, `qibotn.run_contest_tn_case`,
|
||||
`qibotn.run_custom_tn_expectation`, `qibotn.run_contest_mps_case`,
|
||||
`qibotn.run_vidal_mpi_contest_case`, and `qibotn.run_vidal_validation_cases`.
|
||||
6
hostfile
6
hostfile
@@ -1,2 +1,4 @@
|
||||
10.20.1.103:2
|
||||
10.20.6.101:2
|
||||
10.20.1.100
|
||||
10.20.1.101
|
||||
10.20.1.102
|
||||
10.20.1.103
|
||||
|
||||
139
requirements.txt
Normal file
139
requirements.txt
Normal file
@@ -0,0 +1,139 @@
|
||||
alembic==1.18.4
|
||||
annotated-types==0.7.0
|
||||
antlr4-python3-runtime==4.13.2
|
||||
anyio==4.13.0
|
||||
asttokens==3.0.1
|
||||
attrs==26.1.0
|
||||
autoray==0.8.10
|
||||
beautifulsoup4==4.14.3
|
||||
certifi==2026.4.22
|
||||
cffi==2.0.0
|
||||
charset-normalizer==3.4.7
|
||||
click==8.3.3
|
||||
cloudpickle==3.1.2
|
||||
cma==3.4.0
|
||||
colorlog==6.10.1
|
||||
contourpy==1.3.3
|
||||
cotengra==0.7.5
|
||||
coverage==7.13.5
|
||||
cryptography==47.0.0
|
||||
cycler==0.12.1
|
||||
cytoolz==1.1.0
|
||||
dask==2026.3.0
|
||||
decorator==5.2.1
|
||||
dill==0.4.1
|
||||
distributed==2026.3.0
|
||||
executing==2.2.1
|
||||
filelock==3.25.2
|
||||
fonttools==4.62.1
|
||||
fsspec==2026.2.0
|
||||
greenlet==3.3.2
|
||||
h11==0.16.0
|
||||
h5py==3.16.0
|
||||
html5lib==1.1
|
||||
httpcore==1.0.9
|
||||
httpx==0.27.2
|
||||
httpx-sse==0.4.3
|
||||
idna==3.13
|
||||
igraph==1.0.0
|
||||
iniconfig==2.3.0
|
||||
ipython==8.39.0
|
||||
jedi==0.19.2
|
||||
Jinja2==3.1.6
|
||||
joblib==1.5.3
|
||||
jsonschema==4.26.0
|
||||
jsonschema-specifications==2025.9.1
|
||||
kahypar==1.3.7
|
||||
kiwisolver==1.5.0
|
||||
llvmlite==0.44.0
|
||||
locket==1.0.0
|
||||
lxml==6.1.0
|
||||
Mako==1.3.10
|
||||
markdownify==1.2.2
|
||||
MarkupSafe==3.0.3
|
||||
matplotlib==3.10.8
|
||||
matplotlib-inline==0.2.1
|
||||
mcp==1.27.0
|
||||
mcp-server-fetch==2025.4.7
|
||||
mpi4py==4.1.1
|
||||
mpmath==1.3.0
|
||||
msgpack==1.1.2
|
||||
networkx==3.6.1
|
||||
numba==0.61.2
|
||||
numpy @ file:///home/yx/numpy
|
||||
openqasm3==1.0.1
|
||||
opt_einsum==3.4.0
|
||||
optuna==4.8.0
|
||||
packaging==26.0
|
||||
parso==0.8.6
|
||||
partd==1.4.2
|
||||
pexpect==4.9.0
|
||||
pillow==12.2.0
|
||||
pluggy==1.6.0
|
||||
prompt_toolkit==3.0.52
|
||||
Protego==0.6.0
|
||||
protobuf==7.34.1
|
||||
psutil==5.9.8
|
||||
ptyprocess==0.7.0
|
||||
pure_eval==0.2.3
|
||||
py-spy==0.4.2
|
||||
pycparser==3.0
|
||||
pydantic==2.13.3
|
||||
pydantic-settings==2.14.0
|
||||
pydantic_core==2.46.3
|
||||
Pygments==2.20.0
|
||||
PyJWT==2.12.1
|
||||
pyparsing==3.3.2
|
||||
pytest==9.0.3
|
||||
pytest-cov==7.1.0
|
||||
pytest-env==1.6.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.2.2
|
||||
python-multipart==0.0.26
|
||||
PyYAML==6.0.3
|
||||
qibo==0.3.2
|
||||
qibojit==0.1.15
|
||||
-e git+https://git.nudt.space/jaunatisblue/qibotn.git@eed42dcfa9739c609a58f7367fe403abf2e992a9#egg=qibotn
|
||||
qiskit==1.4.5
|
||||
qmatchatea==1.5.8
|
||||
qredtea==0.3.15
|
||||
qtealeaves==1.7.32
|
||||
quimb==1.13.0
|
||||
ray==2.55.1
|
||||
readabilipy==0.3.0
|
||||
referencing==0.37.0
|
||||
regex==2026.4.4
|
||||
requests==2.33.1
|
||||
rpds-py==0.30.0
|
||||
rustworkx==0.17.1
|
||||
scipy @ file:///home/yx/scipy
|
||||
setuptools==70.2.0
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
sortedcontainers==2.4.0
|
||||
soupsieve==2.8.3
|
||||
SQLAlchemy==2.0.49
|
||||
sse-starlette==3.4.1
|
||||
stack-data==0.6.3
|
||||
starlette==1.0.0
|
||||
stevedore==5.7.0
|
||||
symengine==0.13.0
|
||||
sympy==1.14.0
|
||||
tabulate==0.9.0
|
||||
tblib==3.2.2
|
||||
texttable==1.7.0
|
||||
threadpoolctl==3.6.0
|
||||
toolz==1.1.0
|
||||
torch==2.11.0+cpu
|
||||
torchaudio==2.11.0+cpu
|
||||
torchvision==0.26.0+cpu
|
||||
tornado==6.5.5
|
||||
tqdm==4.67.3
|
||||
traitlets==5.14.3
|
||||
typing-inspection==0.4.2
|
||||
typing_extensions==4.15.0
|
||||
urllib3==2.6.3
|
||||
uvicorn==0.46.0
|
||||
wcwidth==0.6.0
|
||||
webencodings==0.5.1
|
||||
zict==3.0.0
|
||||
@@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Focused Vidal/MPS expectation test cases for 1D chain circuits.
|
||||
#
|
||||
# These cases intentionally avoid qmatchatea and generic TN paths. They target
|
||||
# the current supported scope: one-qubit gates, adjacent two-qubit gates, and
|
||||
# Pauli-sum expectation values on a 1D chain.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-hostfile}"
|
||||
|
||||
THREADS="${THREADS:-32}"
|
||||
MPI_RANKS="${MPI_RANKS:-16}"
|
||||
MPI_THREADS="${MPI_THREADS:-12}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
run() {
|
||||
echo
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "$*"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
smoke)
|
||||
# Short correctness-oriented run. Useful before starting long jobs.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits 40 \
|
||||
--nlayers 10 \
|
||||
--bond 2048 \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx long_z_string
|
||||
;;
|
||||
|
||||
convergence)
|
||||
# Same circuit/observable, increasing bond. Check value convergence.
|
||||
for bond in ${BONDS:-4096 16384 65536}; do
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "$bond" \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits "${CIRCUIT:-brickwall_cnot}" \
|
||||
--observables "${OBSERVABLE:-ring_xz}"
|
||||
done
|
||||
;;
|
||||
|
||||
single-long)
|
||||
# Single long Vidal run. On node-3, a similar n=40,l=30,bond=2048 case
|
||||
# took about 9 minutes for one expectation. This one is meant to be longer.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "${BOND:-65536}" \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits "${CIRCUIT:-brickwall_cnot}" \
|
||||
--observables "${OBSERVABLE:-ring_xz}"
|
||||
;;
|
||||
|
||||
suite-long)
|
||||
# Application-style multi-circuit, multi-observable MPS run.
|
||||
# This is intentionally multi-term and should run much longer than single-long.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "${BOND:-65536}" \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
mpi-long)
|
||||
# Multi-node Vidal segmented MPS run. Uses HOSTFILE.
|
||||
run "$MPIEXEC" -hostfile "$HOSTFILE" -n "$MPI_RANKS" "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "${BOND:-65536}" \
|
||||
--torch-threads "$MPI_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
stress)
|
||||
# Heavier entanglement. Start only after single-long is stable.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-18}" \
|
||||
--bond "${BOND:-262144}" \
|
||||
--torch-threads "${THREADS:-48}" \
|
||||
--circuits "${CIRCUIT:-rxx_rzz}" \
|
||||
--observables ring_xz open_zz range2_xx
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat <<'EOF'
|
||||
Usage: ./run_vidal_mps_cases.sh [smoke|convergence|single-long|suite-long|mpi-long|stress]
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
THREADS=32
|
||||
OMP_NUM_THREADS=1 MKL_NUM_THREADS=1
|
||||
|
||||
Single-node scale overrides:
|
||||
NQ=80 LAYERS=16 BOND=65536
|
||||
CIRCUIT=brickwall_cnot
|
||||
OBSERVABLE=ring_xz
|
||||
BONDS="4096 16384 65536" # for convergence mode
|
||||
|
||||
Multi-node overrides:
|
||||
HOSTFILE=hostfile
|
||||
MPI_RANKS=16 MPI_THREADS=12
|
||||
|
||||
Recommended first runs:
|
||||
./run_vidal_mps_cases.sh smoke
|
||||
./run_vidal_mps_cases.sh convergence
|
||||
./run_vidal_mps_cases.sh single-long
|
||||
EOF
|
||||
;;
|
||||
esac
|
||||
@@ -1,5 +1,131 @@
|
||||
import importlib.metadata as im
|
||||
|
||||
from qibotn.backends import MetaBackend
|
||||
|
||||
__version__ = im.version(__package__)
|
||||
|
||||
_LAZY_EXPORTS = {
|
||||
"MetaBackend": ("qibotn.backends", "MetaBackend"),
|
||||
"cpu_backend": ("qibotn.expectation_runner", "cpu_backend"),
|
||||
"cpu_expectation": ("qibotn.expectation_runner", "cpu_expectation"),
|
||||
"mps_expectation": ("qibotn.expectation_runner", "mps_expectation"),
|
||||
"cpu_runcard": ("qibotn.expectation_runner", "cpu_runcard"),
|
||||
"ExpectationConfig": ("qibotn.expectation_runner", "ExpectationConfig"),
|
||||
"exact_for_observable": ("qibotn.expectation_runner", "exact_for_observable"),
|
||||
"run_cpu_expectation": ("qibotn.expectation_runner", "run_cpu_expectation"),
|
||||
"cpu_benchmark_parallel_opts": (
|
||||
"qibotn.expectation_runner",
|
||||
"cpu_benchmark_parallel_opts",
|
||||
),
|
||||
"run_cpu_benchmark_cases": (
|
||||
"qibotn.expectation_runner",
|
||||
"run_cpu_benchmark_cases",
|
||||
),
|
||||
"build_benchmark_circuit": ("qibotn.benchmark_cases", "build_circuit"),
|
||||
"benchmark_observable_terms": ("qibotn.benchmark_cases", "observable_terms"),
|
||||
"exact_pauli_sum": ("qibotn.benchmark_cases", "exact_pauli_sum"),
|
||||
"ring_xz_statevector_expectation": (
|
||||
"qibotn.benchmark_cases",
|
||||
"ring_xz_statevector_expectation",
|
||||
),
|
||||
"terms_to_dict": ("qibotn.benchmark_cases", "terms_to_dict"),
|
||||
"build_contest_circuit": ("qibotn.contest_cases", "build_contest_circuit"),
|
||||
"build_contest_observable": (
|
||||
"qibotn.contest_cases",
|
||||
"build_contest_observable",
|
||||
),
|
||||
"contest_cases": ("qibotn.contest_cases", "CASES"),
|
||||
"analyze_contraction_tree": ("qibotn.parallel", "analyze_contraction_tree"),
|
||||
"load_tree_payload": ("qibotn.parallel", "load_tree_payload"),
|
||||
"save_tree_payload": ("qibotn.parallel", "save_tree_payload"),
|
||||
"slice_tree_payload": ("qibotn.parallel", "slice_tree_payload"),
|
||||
"make_qmatchatea_backend": (
|
||||
"qibotn.backends.qmatchatea",
|
||||
"make_qmatchatea_backend",
|
||||
),
|
||||
"build_qmatchatea_backend": (
|
||||
"qibotn.backends.qmatchatea",
|
||||
"build_qmatchatea_backend",
|
||||
),
|
||||
"benchmark_qmatchatea_svd_control": (
|
||||
"qibotn.backends.qmatchatea",
|
||||
"benchmark_qmatchatea_svd_control",
|
||||
),
|
||||
"run_qmatchatea_expectation": (
|
||||
"qibotn.backends.qmatchatea",
|
||||
"run_qmatchatea_expectation",
|
||||
),
|
||||
"exact_mps_expectation": (
|
||||
"qibotn.backends.qmatchatea",
|
||||
"exact_mps_expectation",
|
||||
),
|
||||
"make_vidal_backend": ("qibotn.backends.vidal", "make_vidal_backend"),
|
||||
"compare_vidal_backend_qmatchatea": (
|
||||
"qibotn.backends.vidal",
|
||||
"compare_vidal_backend_qmatchatea",
|
||||
),
|
||||
"run_vidal_expectation": ("qibotn.backends.vidal", "run_vidal_expectation"),
|
||||
"run_segmented_vidal_ring_xz": (
|
||||
"qibotn.backends.vidal",
|
||||
"run_segmented_vidal_ring_xz",
|
||||
),
|
||||
"build_expectation_tn": ("qibotn.backends.quimb", "build_expectation_tn"),
|
||||
"build_quimb_circuit_stats": (
|
||||
"qibotn.backends.quimb",
|
||||
"build_quimb_circuit_stats",
|
||||
),
|
||||
"compare_quimb_gate_merge": (
|
||||
"qibotn.backends.quimb",
|
||||
"compare_quimb_gate_merge",
|
||||
),
|
||||
"compare_quimb_gate_merge_expectation": (
|
||||
"qibotn.backends.quimb",
|
||||
"compare_quimb_gate_merge_expectation",
|
||||
),
|
||||
"contract_tn": ("qibotn.backends.quimb", "contract_tn"),
|
||||
"load_custom_case_module": ("qibotn.backends.quimb", "load_custom_case_module"),
|
||||
"profile_quimb_torch_expectation": (
|
||||
"qibotn.backends.quimb",
|
||||
"profile_quimb_torch_expectation",
|
||||
),
|
||||
"qibo_circuit_to_quimb_torch": (
|
||||
"qibotn.backends.quimb",
|
||||
"qibo_circuit_to_quimb_torch",
|
||||
),
|
||||
"search_contraction_tree": ("qibotn.backends.quimb", "search_contraction_tree"),
|
||||
"sorted_tree": ("qibotn.backends.quimb", "sorted_tree"),
|
||||
"run_contest_tn_case": ("qibotn.backends.quimb", "run_contest_tn_case"),
|
||||
"run_custom_tn_expectation": (
|
||||
"qibotn.backends.quimb",
|
||||
"run_custom_tn_expectation",
|
||||
),
|
||||
"time_quimb_contract_implementations": (
|
||||
"qibotn.backends.quimb",
|
||||
"time_quimb_contract_implementations",
|
||||
),
|
||||
"run_contest_mps_case": ("qibotn.backends.vidal", "run_contest_mps_case"),
|
||||
"run_vidal_mpi_contest_case": (
|
||||
"qibotn.backends.vidal",
|
||||
"run_vidal_mpi_contest_case",
|
||||
),
|
||||
"run_vidal_validation_cases": (
|
||||
"qibotn.backends.vidal",
|
||||
"run_vidal_validation_cases",
|
||||
),
|
||||
"pauli_pattern": ("qibotn.observables", "pauli_pattern"),
|
||||
"pauli_sum": ("qibotn.observables", "pauli_sum"),
|
||||
}
|
||||
|
||||
|
||||
def __getattr__(name):
|
||||
try:
|
||||
module_name, object_name = _LAZY_EXPORTS[name]
|
||||
except KeyError:
|
||||
raise AttributeError(f"module {__name__!r} has no attribute {name!r}") from None
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
value = getattr(import_module(module_name), object_name)
|
||||
globals()[name] = value
|
||||
return value
|
||||
|
||||
|
||||
__all__ = sorted([*_LAZY_EXPORTS, "__version__"])
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
from typing import Union
|
||||
|
||||
from qibo.config import raise_error
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.backends.cpu import CpuTensorNet
|
||||
from qibotn.backends.cutensornet import CuTensorNet # pylint: disable=E0401
|
||||
|
||||
PLATFORMS = ("cutensornet", "cpu", "quimb", "qmatchatea", "vidal")
|
||||
|
||||
@@ -24,8 +20,12 @@ class MetaBackend:
|
||||
"""
|
||||
|
||||
if platform == "cutensornet": # pragma: no cover
|
||||
from qibotn.backends.cutensornet import CuTensorNet
|
||||
|
||||
return CuTensorNet(runcard)
|
||||
elif platform == "cpu":
|
||||
from qibotn.backends.cpu import CpuTensorNet
|
||||
|
||||
return CpuTensorNet(runcard)
|
||||
elif platform == "quimb": # pragma: no cover
|
||||
import qibotn.backends.quimb as qmb
|
||||
@@ -55,8 +55,8 @@ class MetaBackend:
|
||||
for platform in PLATFORMS:
|
||||
try:
|
||||
MetaBackend.load(platform=platform)
|
||||
available = True
|
||||
except:
|
||||
available = False
|
||||
available_backends[platform] = available
|
||||
except (ImportError, NotImplementedError, TypeError, ValueError):
|
||||
available_backends[platform] = False
|
||||
else:
|
||||
available_backends[platform] = True
|
||||
return available_backends
|
||||
|
||||
@@ -15,14 +15,10 @@ from qibo.config import raise_error
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.backends.vidal import (
|
||||
_observable_mpo_tensors,
|
||||
_operator_terms_to_mpo,
|
||||
_symbolic_hamiltonian_to_operator_terms,
|
||||
_unsupported_reason,
|
||||
)
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor
|
||||
from qibotn.observables import check_observable
|
||||
from qibotn.result import TensorNetworkResult
|
||||
from qibotn.torch_utils import arrays_to_backend, torch_cpu_array, torch_dtype
|
||||
|
||||
|
||||
def _as_bool_or_dict(value, name):
|
||||
@@ -282,87 +278,45 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
):
|
||||
if compile_circuit is None:
|
||||
compile_circuit = self.compile_circuit
|
||||
if preprocess:
|
||||
if self.MPI_enabled:
|
||||
from mpi4py import MPI
|
||||
|
||||
self.rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
compile_circuit=compile_circuit,
|
||||
mpi_approach="CT" if self.MPI_enabled else "SR",
|
||||
mpi_term_batch_size=self.mpi_term_batch_size,
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
self.rank = getattr(backend, "rank", self.rank)
|
||||
self.last_truncation_error = getattr(
|
||||
backend, "last_truncation_error", np.nan
|
||||
)
|
||||
self.last_max_truncation_error = getattr(
|
||||
backend, "last_max_truncation_error", np.nan
|
||||
)
|
||||
return value
|
||||
|
||||
mpo_tensors = _observable_mpo_tensors(observable, circuit.nqubits)
|
||||
if self.MPI_enabled:
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
self.rank = comm.Get_rank()
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
self.last_truncation_error = float(executor.global_truncation_error())
|
||||
self.last_max_truncation_error = float(
|
||||
executor.global_max_truncation_error()
|
||||
)
|
||||
if mpo_tensors is not None:
|
||||
value = executor.expectation_mpo_root(mpo_tensors)
|
||||
else:
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(observable)
|
||||
value = executor.expectation_mpo_root(
|
||||
_operator_terms_to_mpo(terms, circuit.nqubits)
|
||||
)
|
||||
return np.nan if self.rank != 0 else value
|
||||
self.rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
compile_circuit=compile_circuit,
|
||||
mpi_approach="CT" if self.MPI_enabled else "SR",
|
||||
mpi_term_batch_size=self.mpi_term_batch_size,
|
||||
fallback=False,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
self.last_truncation_error = float(executor.truncation_error)
|
||||
self.last_max_truncation_error = float(executor.max_truncation_error)
|
||||
if mpo_tensors is not None:
|
||||
return executor.expectation_mpo(mpo_tensors)
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(observable)
|
||||
return executor.expectation_mpo(_operator_terms_to_mpo(terms, circuit.nqubits))
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=preprocess,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
self.rank = getattr(backend, "rank", self.rank)
|
||||
self.last_truncation_error = getattr(backend, "last_truncation_error", np.nan)
|
||||
self.last_max_truncation_error = getattr(
|
||||
backend, "last_max_truncation_error", np.nan
|
||||
)
|
||||
return value
|
||||
|
||||
def _quimb_backend(self):
|
||||
import qibotn.backends.quimb as qmb
|
||||
|
||||
return qmb.BACKENDS[self.quimb_backend](
|
||||
backend = qmb.BACKENDS[self.quimb_backend](
|
||||
quimb_backend=self.quimb_backend,
|
||||
contraction_optimizer=self.contraction_optimizer,
|
||||
)
|
||||
backend.dtype = self.dtype
|
||||
return backend
|
||||
|
||||
def _bind_rank_to_numa_domain(self, rank):
|
||||
self.numa_domain = _bind_numa_node(rank)
|
||||
@@ -420,9 +374,16 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
search_time = opts.get("max_time", 60)
|
||||
search_backend = opts.get("search_backend")
|
||||
dask_address = opts.get("dask_address")
|
||||
dask_expected_workers = opts.get("dask_expected_workers")
|
||||
dask_close_workers = bool(opts.get("dask_close_workers", False))
|
||||
print_stats = bool(opts.get("print_stats", False))
|
||||
debug_trials = bool(opts.get("debug_trials", False))
|
||||
search_seed = int(opts.get("search_seed", 0))
|
||||
merge_1q = opts.get("merge_1q", "auto")
|
||||
merge_2q = opts.get("merge_2q", "auto")
|
||||
sort_contract_indices = opts.get("sort_contract_indices", "auto")
|
||||
if sort_contract_indices == "auto":
|
||||
sort_contract_indices = self.quimb_backend == "torch"
|
||||
search_only = bool(opts.get("search_only", False))
|
||||
save_tree_path = opts.get("save_tree_path")
|
||||
load_tree_path = opts.get("load_tree_path")
|
||||
@@ -430,6 +391,38 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
saved_trees = []
|
||||
saved_costs = []
|
||||
|
||||
def term_stats(
|
||||
term_index,
|
||||
factors,
|
||||
path_cost,
|
||||
search_stats,
|
||||
tree_slices,
|
||||
slice_assignment,
|
||||
rank_slices,
|
||||
search_seconds,
|
||||
contract_seconds,
|
||||
):
|
||||
return {
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": tree_slices,
|
||||
"slice_assignment": slice_assignment,
|
||||
"rank_slices": rank_slices,
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"search_seed": search_seed,
|
||||
"merge_1q": merge_1q,
|
||||
"merge_2q": merge_2q,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
|
||||
if load_tree_path:
|
||||
with Path(load_tree_path).open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
@@ -444,6 +437,8 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
"max_bond": self.max_bond_dimension,
|
||||
"cutoff": self.cut_ratio,
|
||||
},
|
||||
merge_1q=merge_1q,
|
||||
merge_2q=merge_2q,
|
||||
)
|
||||
|
||||
total_value = 0.0 + 0.0j
|
||||
@@ -463,6 +458,8 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
)
|
||||
else:
|
||||
op, where = _pauli_term_to_dense_operator(factors)
|
||||
if self.quimb_backend == "torch":
|
||||
op = torch_cpu_array(op, dtype=torch_dtype(self.dtype))
|
||||
tn = qc.local_expectation(
|
||||
op,
|
||||
where,
|
||||
@@ -502,10 +499,19 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
dask_address=dask_address,
|
||||
debug_trials=debug_trials,
|
||||
dask_close_workers=dask_close_workers,
|
||||
expected_workers=dask_expected_workers,
|
||||
search_seed=search_seed,
|
||||
)
|
||||
search_seconds = time.perf_counter() - search_start
|
||||
if tree is None:
|
||||
raise RuntimeError("Failed to find a contraction tree for CPU TN MPI.")
|
||||
if sort_contract_indices and hasattr(tree, "sort_contraction_indices"):
|
||||
tree.sort_contraction_indices(
|
||||
priority=opts.get("sort_contract_indices_priority", "flops"),
|
||||
make_output_contig=True,
|
||||
make_contracted_contig=True,
|
||||
reset=True,
|
||||
)
|
||||
if self.parallel_opts.get("contract_implementation") == "cpp":
|
||||
from qibotn.torch_contractor import prepare_torch_cpp_contractor
|
||||
|
||||
@@ -537,23 +543,17 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
|
||||
if search_only:
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": int(getattr(tree, "multiplicity", 1)),
|
||||
"slice_assignment": "search_only",
|
||||
"rank_slices": [],
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": 0.0,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
term_stats(
|
||||
term_index,
|
||||
factors,
|
||||
path_cost,
|
||||
search_stats,
|
||||
int(getattr(tree, "multiplicity", 1)),
|
||||
"search_only",
|
||||
[],
|
||||
search_seconds,
|
||||
0.0,
|
||||
)
|
||||
)
|
||||
continue
|
||||
|
||||
@@ -570,23 +570,17 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
flush=True,
|
||||
)
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": 1,
|
||||
"slice_assignment": "root",
|
||||
"rank_slices": [1] + [0] * (size - 1),
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
term_stats(
|
||||
term_index,
|
||||
factors,
|
||||
path_cost,
|
||||
search_stats,
|
||||
1,
|
||||
"root",
|
||||
[1] + [0] * (size - 1),
|
||||
search_seconds,
|
||||
contract_seconds,
|
||||
)
|
||||
)
|
||||
total_value += coeff * complex(value)
|
||||
continue
|
||||
@@ -603,36 +597,31 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
flush=True,
|
||||
)
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": int(getattr(tree, "multiplicity", 1)),
|
||||
"slice_assignment": "local",
|
||||
"rank_slices": [int(getattr(tree, "multiplicity", 1))],
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
term_stats(
|
||||
term_index,
|
||||
factors,
|
||||
path_cost,
|
||||
search_stats,
|
||||
int(getattr(tree, "multiplicity", 1)),
|
||||
"local",
|
||||
[int(getattr(tree, "multiplicity", 1))],
|
||||
search_seconds,
|
||||
contract_seconds,
|
||||
)
|
||||
)
|
||||
total_value += coeff * complex(np.asarray(value).reshape(-1)[0])
|
||||
continue
|
||||
|
||||
contract_start = time.perf_counter()
|
||||
arrays = self._term_arrays(tn, backend)
|
||||
contract_implementation = self._contract_implementation(backend)
|
||||
value, stats = parallel_contract(
|
||||
tree,
|
||||
arrays,
|
||||
method="mpi",
|
||||
comm=comm,
|
||||
return_stats=True,
|
||||
implementation=self.parallel_opts.get("contract_implementation"),
|
||||
implementation=contract_implementation,
|
||||
)
|
||||
contract_seconds = time.perf_counter() - contract_start
|
||||
gathered_stats = comm.gather(stats, root=0)
|
||||
@@ -645,25 +634,17 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
flush=True,
|
||||
)
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": stats.nslices,
|
||||
"slice_assignment": stats.assignment,
|
||||
"rank_slices": [
|
||||
item.local_slices for item in gathered_stats
|
||||
],
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
term_stats(
|
||||
term_index,
|
||||
factors,
|
||||
path_cost,
|
||||
search_stats,
|
||||
stats.nslices,
|
||||
stats.assignment,
|
||||
[item.local_slices for item in gathered_stats],
|
||||
search_seconds,
|
||||
contract_seconds,
|
||||
)
|
||||
)
|
||||
total_value += coeff * complex(np.asarray(value).reshape(-1)[0])
|
||||
|
||||
@@ -691,18 +672,20 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
|
||||
return np.nan if rank != 0 else float(np.real(total_value))
|
||||
|
||||
def _contract_implementation(self, backend):
|
||||
implementation = self.parallel_opts.get("contract_implementation")
|
||||
if implementation is None and backend.backend == "torch":
|
||||
return "autoray"
|
||||
return implementation
|
||||
|
||||
def _contract_term_unsliced(self, tn, tree, backend):
|
||||
contract_implementation = self.parallel_opts.get("contract_implementation")
|
||||
contract_implementation = self._contract_implementation(backend)
|
||||
if contract_implementation == "cpp":
|
||||
if backend.backend != "torch":
|
||||
raise ValueError("contract_implementation='cpp' requires torch backend.")
|
||||
from qibotn.backends.quimb import _torch_cpu_array, _torch_dtype
|
||||
from qibotn.torch_contractor import contract_tree_cpp
|
||||
|
||||
arrays = [
|
||||
_torch_cpu_array(array, dtype=_torch_dtype(self.dtype))
|
||||
for array in tn.arrays
|
||||
]
|
||||
arrays = arrays_to_backend(tn.arrays, "torch", dtype=self.dtype)
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
if nslices > 1:
|
||||
total = None
|
||||
@@ -713,12 +696,10 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
return contract_tree_cpp(tree, arrays)
|
||||
|
||||
if backend.backend == "torch":
|
||||
from qibotn.backends.quimb import _torch_cpu_array, _torch_dtype
|
||||
|
||||
for tensor in tn.tensors:
|
||||
tensor._data = _torch_cpu_array(
|
||||
tensor._data = torch_cpu_array(
|
||||
tensor._data,
|
||||
dtype=_torch_dtype(self.dtype),
|
||||
dtype=torch_dtype(self.dtype),
|
||||
)
|
||||
return tn.contract(
|
||||
all,
|
||||
@@ -740,13 +721,9 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
return None if user_slicing_opts is None else dict(user_slicing_opts)
|
||||
|
||||
def _term_arrays(self, tn, backend):
|
||||
if backend.backend == "torch":
|
||||
from qibotn.backends.quimb import _torch_cpu_array, _torch_dtype
|
||||
|
||||
return [
|
||||
_torch_cpu_array(array, dtype=_torch_dtype(self.dtype))
|
||||
for array in tn.arrays
|
||||
]
|
||||
from qibotn.backends.quimb import _numpy_dtype
|
||||
|
||||
return [backend.engine.asarray(array, dtype=_numpy_dtype(self.dtype)) for array in tn.arrays]
|
||||
return arrays_to_backend(
|
||||
tn.arrays,
|
||||
backend.backend,
|
||||
engine=backend.engine,
|
||||
dtype=self.dtype,
|
||||
)
|
||||
|
||||
321
src/qibotn/backends/cutensornet_helpers.py
Normal file
321
src/qibotn/backends/cutensornet_helpers.py
Normal file
@@ -0,0 +1,321 @@
|
||||
"""cuTensorNet circuit and MPS conversion helpers."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
from cuquantum.tensornet import contract, contract_path
|
||||
from cuquantum.tensornet.experimental import contract_decompose
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
cutn = None
|
||||
contract = None
|
||||
contract_path = None
|
||||
contract_decompose = None
|
||||
|
||||
|
||||
def _require_cupy():
|
||||
if cp is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum circuit converter requires cupy. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
return cp
|
||||
|
||||
|
||||
def _require_cutensornet():
|
||||
if cp is None or cutn is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS converter requires cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
|
||||
def _require_tensornet_mps():
|
||||
if cp is None or contract is None or contract_decompose is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS helpers require cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
|
||||
def _require_contract():
|
||||
if contract is None or contract_path is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS contraction helper requires cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
|
||||
class QiboCircuitToEinsum:
|
||||
"""Convert a Qibo circuit to cuQuantum interleaved TN operands."""
|
||||
|
||||
def __init__(self, circuit, dtype="complex128"):
|
||||
self.backend = _require_cupy()
|
||||
self.dtype = getattr(self.backend, dtype)
|
||||
self.init_basis_map(self.backend, dtype)
|
||||
self.init_intermediate_circuit(circuit)
|
||||
self.circuit = circuit
|
||||
|
||||
def state_vector_operands(self):
|
||||
input_bitstring = "0" * len(self.active_qubits)
|
||||
input_operands = self._get_bitstring_tensors(input_bitstring)
|
||||
mode_labels, qubits_frontier, next_frontier = self._init_mode_labels_from_qubits(
|
||||
self.active_qubits
|
||||
)
|
||||
gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
|
||||
self.gate_tensors, qubits_frontier, next_frontier
|
||||
)
|
||||
operands = input_operands + gate_operands
|
||||
mode_labels += gate_mode_labels
|
||||
out_list = [qubits_frontier[key] for key in qubits_frontier]
|
||||
operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
|
||||
operand_exp_interleave.append(out_list)
|
||||
return operand_exp_interleave
|
||||
|
||||
def _init_mode_labels_from_qubits(self, qubits):
|
||||
nqubits = len(qubits)
|
||||
frontier_dict = {q: i for i, q in enumerate(qubits)}
|
||||
mode_labels = [[i] for i in range(nqubits)]
|
||||
return mode_labels, frontier_dict, nqubits
|
||||
|
||||
def _get_bitstring_tensors(self, bitstring):
|
||||
return [self.basis_map[ibit] for ibit in bitstring]
|
||||
|
||||
def _parse_gates_to_mode_labels_operands(self, gates, qubits_frontier, next_frontier):
|
||||
mode_labels = []
|
||||
operands = []
|
||||
for tensor, gate_qubits in gates:
|
||||
operands.append(tensor)
|
||||
input_mode_labels = []
|
||||
output_mode_labels = []
|
||||
for qubit in gate_qubits:
|
||||
input_mode_labels.append(qubits_frontier[qubit])
|
||||
output_mode_labels.append(next_frontier)
|
||||
qubits_frontier[qubit] = next_frontier
|
||||
next_frontier += 1
|
||||
mode_labels.append(output_mode_labels + input_mode_labels)
|
||||
return mode_labels, operands
|
||||
|
||||
def op_shape_from_qubits(self, nqubits):
|
||||
return (2, 2) * nqubits
|
||||
|
||||
def init_intermediate_circuit(self, circuit):
|
||||
self.gate_tensors = []
|
||||
gates_qubits = []
|
||||
for gate in circuit.queue:
|
||||
gate_qubits = gate.control_qubits + gate.target_qubits
|
||||
gates_qubits.extend(gate_qubits)
|
||||
required_shape = self.op_shape_from_qubits(len(gate_qubits))
|
||||
self.gate_tensors.append(
|
||||
(
|
||||
self.backend.asarray(gate.matrix(), dtype=self.dtype).reshape(
|
||||
required_shape
|
||||
),
|
||||
gate_qubits,
|
||||
)
|
||||
)
|
||||
self.active_qubits = np.unique(gates_qubits)
|
||||
|
||||
def init_basis_map(self, backend, dtype):
|
||||
asarray = backend.asarray
|
||||
self.basis_map = {
|
||||
"0": asarray([1, 0], dtype=dtype),
|
||||
"1": asarray([0, 1], dtype=dtype),
|
||||
}
|
||||
|
||||
def init_inverse_circuit(self, circuit):
|
||||
self.gate_tensors_inverse = []
|
||||
gates_qubits_inverse = []
|
||||
for gate in circuit.queue:
|
||||
gate_qubits = gate.control_qubits + gate.target_qubits
|
||||
gates_qubits_inverse.extend(gate_qubits)
|
||||
required_shape = self.op_shape_from_qubits(len(gate_qubits))
|
||||
self.gate_tensors_inverse.append(
|
||||
(self.backend.asarray(gate.matrix()).reshape(required_shape), gate_qubits)
|
||||
)
|
||||
self.active_qubits_inverse = np.unique(gates_qubits_inverse)
|
||||
|
||||
def get_pauli_gates(self, pauli_map, dtype="complex128", backend=None):
|
||||
if backend is None:
|
||||
backend = _require_cupy()
|
||||
asarray = backend.asarray
|
||||
operand_map = {
|
||||
"I": asarray([[1, 0], [0, 1]], dtype=dtype),
|
||||
"X": asarray([[0, 1], [1, 0]], dtype=dtype),
|
||||
"Y": asarray([[0, -1j], [1j, 0]], dtype=dtype),
|
||||
"Z": asarray([[1, 0], [0, -1]], dtype=dtype),
|
||||
}
|
||||
gates = []
|
||||
for qubit, pauli_char in pauli_map.items():
|
||||
operand = operand_map.get(pauli_char)
|
||||
if operand is None:
|
||||
raise ValueError("pauli string character must be one of I/X/Y/Z")
|
||||
gates.append((operand, (qubit,)))
|
||||
return gates
|
||||
|
||||
def expectation_operands(self, ham_gates):
|
||||
input_bitstring = "0" * self.circuit.nqubits
|
||||
input_operands = self._get_bitstring_tensors(input_bitstring)
|
||||
mode_labels, qubits_frontier, next_frontier = self._init_mode_labels_from_qubits(
|
||||
range(self.circuit.nqubits)
|
||||
)
|
||||
gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
|
||||
self.gate_tensors, qubits_frontier, next_frontier
|
||||
)
|
||||
operands = input_operands + gate_operands
|
||||
mode_labels += gate_mode_labels
|
||||
|
||||
self.init_inverse_circuit(self.circuit.invert())
|
||||
next_frontier = max(qubits_frontier.values()) + 1
|
||||
gates_inverse = ham_gates + self.gate_tensors_inverse
|
||||
gate_mode_labels_inverse, gate_operands_inverse = (
|
||||
self._parse_gates_to_mode_labels_operands(
|
||||
gates_inverse, qubits_frontier, next_frontier
|
||||
)
|
||||
)
|
||||
mode_labels = (
|
||||
mode_labels
|
||||
+ gate_mode_labels_inverse
|
||||
+ [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
|
||||
)
|
||||
operands = operands + gate_operands_inverse + operands[: self.circuit.nqubits]
|
||||
operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
|
||||
operand_exp_interleave.append([])
|
||||
return operand_exp_interleave
|
||||
|
||||
|
||||
def initial_mps(num_qubits, dtype):
|
||||
_require_tensornet_mps()
|
||||
state_tensor = cp.asarray([1, 0], dtype=dtype).reshape(1, 2, 1)
|
||||
return [state_tensor] * num_qubits
|
||||
|
||||
|
||||
def mps_site_right_swap(mps_tensors, i, **kwargs):
|
||||
_require_tensornet_mps()
|
||||
left, _, right = contract_decompose(
|
||||
"ipj,jqk->iqj,jpk",
|
||||
*mps_tensors[i : i + 2],
|
||||
algorithm=kwargs.get("algorithm", None),
|
||||
options=kwargs.get("options", None),
|
||||
)
|
||||
mps_tensors[i : i + 2] = (left, right)
|
||||
return mps_tensors
|
||||
|
||||
|
||||
def apply_mps_gate(mps_tensors, gate, qubits, **kwargs):
|
||||
_require_tensornet_mps()
|
||||
n_qubits = len(qubits)
|
||||
if n_qubits == 1:
|
||||
site = qubits[0]
|
||||
mps_tensors[site] = contract(
|
||||
"ipj,qp->iqj",
|
||||
mps_tensors[site],
|
||||
gate,
|
||||
options=kwargs.get("options", None),
|
||||
)
|
||||
elif n_qubits == 2:
|
||||
left, right = qubits
|
||||
if left > right:
|
||||
return apply_mps_gate(
|
||||
mps_tensors, gate.transpose(1, 0, 3, 2), (right, left), **kwargs
|
||||
)
|
||||
if left + 1 == right:
|
||||
a_tensor, _, b_tensor = contract_decompose(
|
||||
"ipj,jqk,rspq->irj,jsk",
|
||||
*mps_tensors[left : left + 2],
|
||||
gate,
|
||||
algorithm=kwargs.get("algorithm", None),
|
||||
options=kwargs.get("options", None),
|
||||
)
|
||||
mps_tensors[left : left + 2] = (a_tensor, b_tensor)
|
||||
else:
|
||||
mps_site_right_swap(mps_tensors, left, **kwargs)
|
||||
apply_mps_gate(mps_tensors, gate, (left + 1, right), **kwargs)
|
||||
mps_site_right_swap(mps_tensors, left, **kwargs)
|
||||
else:
|
||||
raise NotImplementedError("Only one- and two-qubit gates supported")
|
||||
|
||||
|
||||
class QiboCircuitToMPS:
|
||||
"""Convert a Qibo circuit to a cuTensorNet MPS representation."""
|
||||
|
||||
def __init__(self, circ_qibo, gate_algo, dtype="complex128", rand_seed=0):
|
||||
_require_cutensornet()
|
||||
np.random.seed(rand_seed)
|
||||
cp.random.seed(rand_seed)
|
||||
self.num_qubits = circ_qibo.nqubits
|
||||
self.handle = cutn.create()
|
||||
self.dtype = dtype
|
||||
self.mps_tensors = initial_mps(self.num_qubits, dtype=dtype)
|
||||
circuitconvertor = QiboCircuitToEinsum(circ_qibo, dtype=dtype)
|
||||
for gate, qubits in circuitconvertor.gate_tensors:
|
||||
apply_mps_gate(
|
||||
self.mps_tensors,
|
||||
gate,
|
||||
qubits,
|
||||
algorithm=gate_algo,
|
||||
options={"handle": self.handle},
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
handle = getattr(self, "handle", None)
|
||||
if cutn is not None and handle is not None:
|
||||
cutn.destroy(handle)
|
||||
|
||||
|
||||
class MPSContractionHelper:
|
||||
"""Contract cuTensorNet MPS tensors to norms, states, or expectations."""
|
||||
|
||||
def __init__(self, num_qubits):
|
||||
self.num_qubits = num_qubits
|
||||
self.bra_modes = [(2 * i, 2 * i + 1, 2 * i + 2) for i in range(num_qubits)]
|
||||
offset = 2 * num_qubits + 1
|
||||
self.ket_modes = [
|
||||
(i + offset, 2 * i + 1, i + 1 + offset) for i in range(num_qubits)
|
||||
]
|
||||
|
||||
def contract_norm(self, mps_tensors, options=None):
|
||||
interleaved_inputs = []
|
||||
for i, tensor in enumerate(mps_tensors):
|
||||
interleaved_inputs.extend(
|
||||
[tensor, self.bra_modes[i], tensor.conj(), self.ket_modes[i]]
|
||||
)
|
||||
interleaved_inputs.append([])
|
||||
return self._contract(interleaved_inputs, options=options).real
|
||||
|
||||
def contract_state_vector(self, mps_tensors, options=None):
|
||||
interleaved_inputs = []
|
||||
for i, tensor in enumerate(mps_tensors):
|
||||
interleaved_inputs.extend([tensor, self.bra_modes[i]])
|
||||
output_modes = tuple([bra_modes[1] for bra_modes in self.bra_modes])
|
||||
interleaved_inputs.append(output_modes)
|
||||
return self._contract(interleaved_inputs, options=options)
|
||||
|
||||
def contract_expectation(
|
||||
self, mps_tensors, operator, qubits, options=None, normalize=False
|
||||
):
|
||||
interleaved_inputs = []
|
||||
extra_mode = 3 * self.num_qubits + 2
|
||||
operator_modes = [None] * len(qubits) + [self.bra_modes[q][1] for q in qubits]
|
||||
qubits = list(qubits)
|
||||
for i, tensor in enumerate(mps_tensors):
|
||||
interleaved_inputs.extend([tensor, self.bra_modes[i]])
|
||||
ket_modes = self.ket_modes[i]
|
||||
if i in qubits:
|
||||
ket_modes = (ket_modes[0], extra_mode, ket_modes[2])
|
||||
operator_modes[qubits.index(i)] = extra_mode
|
||||
extra_mode += 1
|
||||
interleaved_inputs.extend([tensor.conj(), ket_modes])
|
||||
interleaved_inputs.extend([operator, tuple(operator_modes)])
|
||||
interleaved_inputs.append([])
|
||||
norm = self.contract_norm(mps_tensors, options=options) if normalize else 1
|
||||
return self._contract(interleaved_inputs, options=options) / norm
|
||||
|
||||
def _contract(self, interleaved_inputs, options=None):
|
||||
_require_contract()
|
||||
path = contract_path(*interleaved_inputs, options=options)[0]
|
||||
return contract(*interleaved_inputs, options=options, optimize={"path": path})
|
||||
@@ -1,6 +1,9 @@
|
||||
"""Implementation of Quantum Matcha Tea backend."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
@@ -12,6 +15,7 @@ from qibo.config import raise_error
|
||||
from qmatchatea.utils import MPISettings
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.benchmark_cases import exact_pauli_sum
|
||||
from qibotn.observables import check_observable
|
||||
from qibotn.result import TensorNetworkResult
|
||||
|
||||
@@ -364,3 +368,207 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
use_itpo=False,
|
||||
)
|
||||
return obs_sum
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QMatchaTeaExpectationResult:
|
||||
value: float
|
||||
seconds: float
|
||||
backend: object
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QMatchaTeaBuildResult:
|
||||
backend: object
|
||||
build_seconds: float
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class QMatchaTeaSvdControlResult:
|
||||
ctrl: str
|
||||
contract_singvals: str
|
||||
status: str
|
||||
median_ms: float
|
||||
min_ms: float
|
||||
rel_error: float | None
|
||||
kept: int | None
|
||||
error: str
|
||||
|
||||
|
||||
def make_qmatchatea_backend(
|
||||
*,
|
||||
bond=10,
|
||||
cut_ratio=1e-9,
|
||||
tensor_module="torch",
|
||||
svd_control="E!",
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
mpi_approach="SR",
|
||||
mpi_num_procs=1,
|
||||
mpi_where_barriers=-1,
|
||||
mpi_isometrization=-1,
|
||||
):
|
||||
backend = QMatchaTeaBackend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
svd_control=svd_control,
|
||||
tensor_module=tensor_module,
|
||||
compile_circuit=compile_circuit,
|
||||
track_memory=track_memory,
|
||||
mpi_approach=mpi_approach,
|
||||
mpi_num_procs=mpi_num_procs,
|
||||
mpi_where_barriers=mpi_where_barriers,
|
||||
mpi_isometrization=mpi_isometrization,
|
||||
)
|
||||
return backend
|
||||
|
||||
|
||||
def build_qmatchatea_backend(
|
||||
*,
|
||||
bond=10,
|
||||
cut_ratio=1e-9,
|
||||
tensor_module="torch",
|
||||
svd_control="E!",
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
mpi_approach="SR",
|
||||
mpi_num_procs=1,
|
||||
mpi_where_barriers=-1,
|
||||
mpi_isometrization=-1,
|
||||
):
|
||||
start = time.perf_counter()
|
||||
backend = make_qmatchatea_backend(
|
||||
bond=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
svd_control=svd_control,
|
||||
compile_circuit=compile_circuit,
|
||||
track_memory=track_memory,
|
||||
mpi_approach=mpi_approach,
|
||||
mpi_num_procs=mpi_num_procs,
|
||||
mpi_where_barriers=mpi_where_barriers,
|
||||
mpi_isometrization=mpi_isometrization,
|
||||
)
|
||||
return QMatchaTeaBuildResult(backend=backend, build_seconds=time.perf_counter() - start)
|
||||
|
||||
|
||||
def exact_mps_expectation(circuit, observable, nqubits):
|
||||
if isinstance(observable, dict) and "terms" in observable:
|
||||
terms = [
|
||||
(
|
||||
term["coefficient"],
|
||||
tuple((name, site) for name, site in term["operators"]),
|
||||
)
|
||||
for term in observable["terms"]
|
||||
]
|
||||
return exact_pauli_sum(circuit, terms, nqubits)
|
||||
|
||||
hamiltonian = check_observable(observable, nqubits)
|
||||
return float(hamiltonian.expectation_from_state(circuit().state(numpy=True)).real)
|
||||
|
||||
|
||||
def run_qmatchatea_expectation(
|
||||
circuit,
|
||||
observable,
|
||||
*,
|
||||
bond=10,
|
||||
cut_ratio=1e-9,
|
||||
tensor_module="torch",
|
||||
svd_control="E!",
|
||||
compile_circuit=True,
|
||||
preprocess=True,
|
||||
track_memory=False,
|
||||
mpi_approach="SR",
|
||||
mpi_num_procs=1,
|
||||
mpi_where_barriers=-1,
|
||||
mpi_isometrization=-1,
|
||||
):
|
||||
built = build_qmatchatea_backend(
|
||||
bond=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
svd_control=svd_control,
|
||||
compile_circuit=compile_circuit,
|
||||
track_memory=track_memory,
|
||||
mpi_approach=mpi_approach,
|
||||
mpi_num_procs=mpi_num_procs,
|
||||
mpi_where_barriers=mpi_where_barriers,
|
||||
mpi_isometrization=mpi_isometrization,
|
||||
)
|
||||
start = time.perf_counter()
|
||||
value = built.backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=preprocess,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
return QMatchaTeaExpectationResult(
|
||||
value=float(np.real(value)),
|
||||
seconds=time.perf_counter() - start,
|
||||
backend=built.backend,
|
||||
)
|
||||
|
||||
|
||||
def benchmark_qmatchatea_svd_control(matrix, *, ctrl, max_bond, contract_singvals, repeats):
|
||||
import gc
|
||||
import statistics
|
||||
|
||||
import torch
|
||||
|
||||
from qredtea.torchapi import QteaTorchTensor
|
||||
|
||||
conv = qmatchatea.QCConvergenceParameters(
|
||||
max_bond_dimension=max_bond,
|
||||
cut_ratio=0.0,
|
||||
svd_ctrl=ctrl,
|
||||
)
|
||||
qtensor = QteaTorchTensor.from_elem_array(matrix, dtype=matrix.dtype, device="cpu")
|
||||
|
||||
times = []
|
||||
rel_error = None
|
||||
kept = None
|
||||
status = "ok"
|
||||
error = ""
|
||||
|
||||
for i in range(repeats):
|
||||
gc.collect()
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.synchronize()
|
||||
t0 = time.perf_counter()
|
||||
try:
|
||||
left, right, singvals, _ = qtensor.split_svd(
|
||||
[0],
|
||||
[1],
|
||||
contract_singvals=contract_singvals,
|
||||
conv_params=conv,
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
status = "error"
|
||||
error = repr(exc)
|
||||
break
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.synchronize()
|
||||
times.append(time.perf_counter() - t0)
|
||||
|
||||
if i == repeats - 1:
|
||||
left_matrix = left.elem.reshape(matrix.shape[0], -1)
|
||||
right_matrix = right.elem.reshape(-1, matrix.shape[1])
|
||||
recon = left_matrix @ right_matrix
|
||||
rel_error = (
|
||||
torch.linalg.vector_norm(matrix - recon)
|
||||
/ torch.linalg.vector_norm(matrix)
|
||||
).item()
|
||||
kept = int(singvals.numel())
|
||||
|
||||
return QMatchaTeaSvdControlResult(
|
||||
ctrl=ctrl,
|
||||
contract_singvals=contract_singvals,
|
||||
status=status,
|
||||
median_ms=float("nan") if not times else statistics.median(times) * 1000,
|
||||
min_ms=float("nan") if not times else min(times) * 1000,
|
||||
rel_error=rel_error,
|
||||
kept=kept,
|
||||
error=error,
|
||||
)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -9,6 +9,7 @@ usable while the fast path is expanded.
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
@@ -475,3 +476,511 @@ class VidalBackend(QibotnBackend, NumpyBackend):
|
||||
return_array=return_array,
|
||||
**prob_kwargs,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VidalExpectationResult:
|
||||
value: float
|
||||
seconds: float
|
||||
backend: object
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VidalBackendComparisonResult:
|
||||
circuit: object
|
||||
observable: object
|
||||
exact: float | None
|
||||
qmatchatea: VidalExpectationResult | None
|
||||
vidal: VidalExpectationResult
|
||||
qmatchatea_error: float | None
|
||||
vidal_error: float | None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class VidalProfileResult:
|
||||
value: float
|
||||
trace_path: object
|
||||
table_path: object
|
||||
table: str
|
||||
|
||||
|
||||
def make_vidal_backend(
|
||||
*,
|
||||
bond=10,
|
||||
cut_ratio=1e-9,
|
||||
tensor_module="torch",
|
||||
compile_circuit=False,
|
||||
mpi_approach="SR",
|
||||
mpi_num_procs=1,
|
||||
mpi_where_barriers=-1,
|
||||
mpi_isometrization=-1,
|
||||
mpi_term_batch_size=None,
|
||||
fallback=True,
|
||||
):
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
compile_circuit=compile_circuit,
|
||||
mpi_approach=mpi_approach,
|
||||
mpi_num_procs=mpi_num_procs,
|
||||
mpi_where_barriers=mpi_where_barriers,
|
||||
mpi_isometrization=mpi_isometrization,
|
||||
mpi_term_batch_size=mpi_term_batch_size,
|
||||
fallback=fallback,
|
||||
)
|
||||
return backend
|
||||
|
||||
|
||||
def run_vidal_expectation(
|
||||
circuit,
|
||||
observable,
|
||||
*,
|
||||
bond=10,
|
||||
cut_ratio=1e-9,
|
||||
tensor_module="torch",
|
||||
compile_circuit=False,
|
||||
preprocess=True,
|
||||
mpi_approach="SR",
|
||||
mpi_num_procs=1,
|
||||
mpi_where_barriers=-1,
|
||||
mpi_isometrization=-1,
|
||||
mpi_term_batch_size=None,
|
||||
fallback=True,
|
||||
):
|
||||
backend = make_vidal_backend(
|
||||
bond=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
compile_circuit=compile_circuit,
|
||||
mpi_approach=mpi_approach,
|
||||
mpi_num_procs=mpi_num_procs,
|
||||
mpi_where_barriers=mpi_where_barriers,
|
||||
mpi_isometrization=mpi_isometrization,
|
||||
mpi_term_batch_size=mpi_term_batch_size,
|
||||
fallback=fallback,
|
||||
)
|
||||
start = time.perf_counter()
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=preprocess,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
return VidalExpectationResult(
|
||||
value=float(np.real(value)),
|
||||
seconds=time.perf_counter() - start,
|
||||
backend=backend,
|
||||
)
|
||||
|
||||
|
||||
def run_segmented_vidal_ring_xz(
|
||||
circuit,
|
||||
*,
|
||||
max_bond=10,
|
||||
cut_ratio=1e-9,
|
||||
tensor_module="torch",
|
||||
comm,
|
||||
):
|
||||
from qibotn.backends.vidal_mpi_segment import run_segment_vidal_mpi_ring_xz
|
||||
|
||||
start = time.perf_counter()
|
||||
value, timings = run_segment_vidal_mpi_ring_xz(
|
||||
circuit,
|
||||
max_bond=max_bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
return VidalExpectationResult(
|
||||
value=float(np.real(value)),
|
||||
seconds=time.perf_counter() - start,
|
||||
backend=timings,
|
||||
)
|
||||
|
||||
|
||||
def compare_vidal_backend_qmatchatea(
|
||||
circuit,
|
||||
observable,
|
||||
*,
|
||||
bond=512,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module="torch",
|
||||
exact=None,
|
||||
skip_qmatchatea=False,
|
||||
qmatchatea_compile_circuit=True,
|
||||
qmatchatea_svd_control="E!",
|
||||
vidal_compile_circuit=True,
|
||||
vidal_fallback=True,
|
||||
):
|
||||
qmatchatea_result = None
|
||||
if not skip_qmatchatea:
|
||||
qmatchatea_backend = QMatchaTeaBackend()
|
||||
qmatchatea_backend.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
svd_control=qmatchatea_svd_control,
|
||||
tensor_module=tensor_module,
|
||||
compile_circuit=qmatchatea_compile_circuit,
|
||||
track_memory=False,
|
||||
)
|
||||
start = time.perf_counter()
|
||||
qmatchatea_value = qmatchatea_backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=False,
|
||||
compile_circuit=qmatchatea_compile_circuit,
|
||||
)
|
||||
qmatchatea_result = VidalExpectationResult(
|
||||
value=float(np.real(qmatchatea_value)),
|
||||
seconds=time.perf_counter() - start,
|
||||
backend=qmatchatea_backend,
|
||||
)
|
||||
|
||||
vidal_backend = VidalBackend()
|
||||
vidal_backend.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
compile_circuit=vidal_compile_circuit,
|
||||
fallback=vidal_fallback,
|
||||
)
|
||||
start = time.perf_counter()
|
||||
vidal_value = vidal_backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=False,
|
||||
compile_circuit=vidal_compile_circuit,
|
||||
)
|
||||
vidal_result = VidalExpectationResult(
|
||||
value=float(np.real(vidal_value)),
|
||||
seconds=time.perf_counter() - start,
|
||||
backend=vidal_backend,
|
||||
)
|
||||
|
||||
qmatchatea_error = None
|
||||
vidal_error = None
|
||||
if exact is not None:
|
||||
if qmatchatea_result is not None:
|
||||
qmatchatea_error = abs(qmatchatea_result.value - exact)
|
||||
vidal_error = abs(vidal_result.value - exact)
|
||||
|
||||
return VidalBackendComparisonResult(
|
||||
circuit=circuit,
|
||||
observable=observable,
|
||||
exact=exact,
|
||||
qmatchatea=qmatchatea_result,
|
||||
vidal=vidal_result,
|
||||
qmatchatea_error=qmatchatea_error,
|
||||
vidal_error=vidal_error,
|
||||
)
|
||||
|
||||
|
||||
def profile_vidal_expectation(
|
||||
circuit,
|
||||
observable,
|
||||
*,
|
||||
bond=512,
|
||||
cut_ratio=1e-12,
|
||||
torch_threads=32,
|
||||
trace_path,
|
||||
table_path,
|
||||
profile_memory=False,
|
||||
rows=60,
|
||||
):
|
||||
import torch
|
||||
from torch.profiler import ProfilerActivity, profile
|
||||
|
||||
from qibotn.expectation_runner import ExpectationConfig, run_cpu_expectation
|
||||
|
||||
torch.set_num_threads(torch_threads)
|
||||
config = ExpectationConfig(
|
||||
ansatz="mps",
|
||||
bond=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module="torch",
|
||||
torch_threads=torch_threads,
|
||||
)
|
||||
|
||||
with profile(
|
||||
activities=[ProfilerActivity.CPU],
|
||||
record_shapes=profile_memory,
|
||||
profile_memory=profile_memory,
|
||||
with_stack=profile_memory,
|
||||
) as prof:
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
|
||||
table = (
|
||||
f"expval={result.value:.16e}\n\n"
|
||||
f"# sorted by self_cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='self_cpu_time_total', row_limit=rows)}\n\n"
|
||||
f"# sorted by cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='cpu_time_total', row_limit=rows)}\n"
|
||||
)
|
||||
table_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
table_path.write_text(table, encoding="utf-8")
|
||||
prof.export_chrome_trace(str(trace_path))
|
||||
return VidalProfileResult(
|
||||
value=result.value,
|
||||
trace_path=trace_path,
|
||||
table_path=table_path,
|
||||
table=table,
|
||||
)
|
||||
|
||||
|
||||
CONTEST_MPS_BONDS = {"main1": 512, "main2": 1024, "strong": 2048}
|
||||
CONTEST_VIDAL_OBSERVABLES = (
|
||||
"boundary_ZZ_q1",
|
||||
"boundary_ZZ_q2",
|
||||
"boundary_ZZ_q3",
|
||||
"long_Z_5_sites",
|
||||
"mixed_XZYZX",
|
||||
"ring_xz",
|
||||
"open_zz",
|
||||
"range2_xx",
|
||||
"complex_iZ0",
|
||||
"dense2_mid",
|
||||
"dense3_spread",
|
||||
)
|
||||
|
||||
|
||||
def run_contest_mps_case(
|
||||
case_name="main1",
|
||||
*,
|
||||
observables=None,
|
||||
obs_filter="",
|
||||
nqubits=None,
|
||||
nlayers=None,
|
||||
bond="case-default",
|
||||
cut_ratio=1e-12,
|
||||
seed=None,
|
||||
torch_threads=8,
|
||||
exact=False,
|
||||
exact_max_qubits=24,
|
||||
):
|
||||
"""Run a shared contest-style Vidal/MPS expectation case."""
|
||||
from qibotn.contest_cases import CASES, build_contest_circuit, build_contest_observable
|
||||
from qibotn.expectation_runner import exact_for_observable
|
||||
from qibotn.torch_utils import set_torch_threads
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
set_torch_threads(torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
case = CASES[case_name]
|
||||
nqubits = case.nqubits if nqubits is None else nqubits
|
||||
nlayers = case.nlayers if nlayers is None else nlayers
|
||||
seed = case.seed if seed is None else seed
|
||||
if bond == "case-default":
|
||||
bond = CONTEST_MPS_BONDS.get(case_name, 1024)
|
||||
if observables is None:
|
||||
observables = tuple(x.strip() for x in obs_filter.split(",") if x.strip()) or case.observables
|
||||
|
||||
circuit = build_contest_circuit(case.circuit_kind, nqubits, nlayers, seed)
|
||||
records = []
|
||||
for obs_name in observables:
|
||||
observable = build_contest_observable(obs_name, nqubits, seed)
|
||||
exact_value = None
|
||||
if exact and rank == 0:
|
||||
if nqubits > exact_max_qubits:
|
||||
raise ValueError(f"exact reference is limited to {exact_max_qubits} qubits.")
|
||||
exact_value = exact_for_observable(circuit, observable, nqubits)
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
seconds = time.perf_counter() - start
|
||||
if rank == 0:
|
||||
records.append(
|
||||
{
|
||||
"case": case,
|
||||
"observable": obs_name,
|
||||
"value": value,
|
||||
"seconds": seconds,
|
||||
"exact": exact_value,
|
||||
"abs_error": None if exact_value is None else abs(value - exact_value),
|
||||
"rel_error": (
|
||||
None
|
||||
if exact_value is None
|
||||
else abs(value - exact_value) / max(abs(exact_value), 1e-15)
|
||||
),
|
||||
"truncation_error": backend.last_truncation_error,
|
||||
"max_truncation_error": backend.last_max_truncation_error,
|
||||
}
|
||||
)
|
||||
return records
|
||||
|
||||
|
||||
def run_vidal_mpi_contest_case(
|
||||
*,
|
||||
label,
|
||||
kind,
|
||||
nqubits,
|
||||
nlayers,
|
||||
bond,
|
||||
cut_ratio,
|
||||
seed,
|
||||
torch_threads,
|
||||
obs_filter="",
|
||||
):
|
||||
"""Run the direct Vidal MPI contest observable sweep."""
|
||||
from qibotn.contest_cases import build_contest_circuit, build_contest_observable
|
||||
from qibotn.torch_utils import set_torch_threads
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
del label
|
||||
set_torch_threads(torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
circuit = build_contest_circuit(kind, nqubits, nlayers, seed)
|
||||
names = CONTEST_VIDAL_OBSERVABLES
|
||||
if obs_filter:
|
||||
wanted = set(obs_filter.split(","))
|
||||
names = tuple(name for name in names if name in wanted)
|
||||
if not names:
|
||||
raise ValueError(f"obs_filter matched no observables: {obs_filter!r}")
|
||||
|
||||
records = []
|
||||
for obs_name in names:
|
||||
observable = build_contest_observable(obs_name, nqubits, seed)
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
seconds = time.perf_counter() - start
|
||||
if rank == 0:
|
||||
records.append(
|
||||
{
|
||||
"observable": obs_name,
|
||||
"value": value,
|
||||
"seconds": seconds,
|
||||
"truncation_error": backend.last_truncation_error,
|
||||
"max_truncation_error": backend.last_max_truncation_error,
|
||||
}
|
||||
)
|
||||
return records
|
||||
|
||||
|
||||
def build_vidal_validation_circuit(kind, nqubits, nlayers, seed):
|
||||
"""Build the circuit family used by Vidal correctness checks."""
|
||||
from qibotn.benchmark_cases import build_circuit
|
||||
|
||||
aliases = {"brickwall": "brickwall_cnot"}
|
||||
return build_circuit(aliases.get(kind, kind), nqubits, nlayers, seed)
|
||||
|
||||
|
||||
def run_vidal_validation_cases(
|
||||
*,
|
||||
nqubits=16,
|
||||
nlayers=6,
|
||||
bond=512,
|
||||
seed=42,
|
||||
tensor_module="torch",
|
||||
torch_threads=32,
|
||||
mpi=False,
|
||||
circuits=("brickwall", "reversed_cnot", "rx_ry_cz"),
|
||||
observables=("ring_xz", "open_zz", "mixed_local"),
|
||||
):
|
||||
"""Run Vidal/TEBD correctness checks against dense statevector references."""
|
||||
from qibotn.benchmark_cases import exact_pauli_sum, observable_terms
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor
|
||||
from qibotn.torch_utils import set_torch_threads
|
||||
|
||||
set_torch_threads(torch_threads)
|
||||
comm = None
|
||||
rank = 0
|
||||
if mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
else:
|
||||
SegmentVidalMPIExecutor = None
|
||||
|
||||
records = []
|
||||
for circuit_kind in circuits:
|
||||
circuit = build_vidal_validation_circuit(circuit_kind, nqubits, nlayers, seed)
|
||||
if rank == 0:
|
||||
exact_values = {
|
||||
obs: exact_pauli_sum(circuit, observable_terms(obs, nqubits), nqubits)
|
||||
for obs in observables
|
||||
}
|
||||
else:
|
||||
exact_values = None
|
||||
if comm is not None:
|
||||
exact_values = comm.bcast(exact_values, root=0)
|
||||
|
||||
for obs_kind in observables:
|
||||
terms = observable_terms(obs_kind, nqubits)
|
||||
start = time.perf_counter()
|
||||
if mpi:
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
value = executor.expectation_pauli_sum_root(terms)
|
||||
else:
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
value = float(executor.expectation_pauli_sum(terms))
|
||||
if rank != 0:
|
||||
continue
|
||||
seconds = time.perf_counter() - start
|
||||
exact = exact_values[obs_kind]
|
||||
records.append(
|
||||
{
|
||||
"circuit": circuit_kind,
|
||||
"observable": obs_kind,
|
||||
"exact": exact,
|
||||
"value": value,
|
||||
"abs_error": abs(value - exact),
|
||||
"seconds": seconds,
|
||||
}
|
||||
)
|
||||
return records
|
||||
|
||||
@@ -12,6 +12,7 @@ CIRCUITS = (
|
||||
"brickwall_cnot",
|
||||
"reversed_cnot",
|
||||
"shifted_cz",
|
||||
"rx_ry_cz",
|
||||
"rxx_rzz",
|
||||
"swap_scramble",
|
||||
"ghz_ladder",
|
||||
@@ -49,14 +50,14 @@ def build_circuit(kind, nqubits, nlayers, seed):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind in ("rxx_rzz", "swap_scramble"):
|
||||
if kind in ("rx_ry_cz", "rxx_rzz", "swap_scramble"):
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind == "brickwall_cnot":
|
||||
add_brickwall(circuit, nqubits, gates.CNOT, layer, reverse=False)
|
||||
elif kind == "reversed_cnot":
|
||||
add_brickwall(circuit, nqubits, gates.CNOT, layer, reverse=True)
|
||||
elif kind == "shifted_cz":
|
||||
elif kind in ("shifted_cz", "rx_ry_cz"):
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(qubit, qubit + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
@@ -149,3 +150,22 @@ def exact_pauli_sum(circuit, terms, nqubits):
|
||||
raise ValueError(f"Unsupported Pauli {name!r}.")
|
||||
value += coeff * np.vdot(state[flipped], phase * state)
|
||||
return float(value.real)
|
||||
|
||||
|
||||
def ring_xz_statevector_expectation(state, nqubits, chunk_size=1 << 20):
|
||||
"""Compute ``0.5 * sum_i X_i Z_(i+1)`` from a dense state vector."""
|
||||
state = np.asarray(state).reshape(-1)
|
||||
value = 0.0
|
||||
for qubit in range(nqubits):
|
||||
next_qubit = (qubit + 1) % nqubits
|
||||
x_flip = 1 << (nqubits - 1 - qubit)
|
||||
z_shift = nqubits - 1 - next_qubit
|
||||
term = 0.0
|
||||
for start in range(0, state.size, chunk_size):
|
||||
stop = min(start + chunk_size, state.size)
|
||||
indices = np.arange(start, stop, dtype=np.int64)
|
||||
z_bit = (indices >> z_shift) & 1
|
||||
z_phase = 1 - 2 * z_bit
|
||||
term += np.vdot(state[indices ^ x_flip], z_phase * state[start:stop]).real
|
||||
value += 0.5 * term
|
||||
return float(value)
|
||||
|
||||
@@ -1,263 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
|
||||
|
||||
def _require_cupy():
|
||||
if cp is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum circuit converter requires cupy. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
return cp
|
||||
|
||||
# Reference: https://github.com/NVIDIA/cuQuantum/tree/main/python/samples/cutensornet/circuit_converter
|
||||
|
||||
|
||||
class QiboCircuitToEinsum:
|
||||
"""Convert a circuit to a Tensor Network (TN) representation.
|
||||
|
||||
The circuit is first processed to an intermediate form by grouping each gate matrix
|
||||
with its corresponding qubit it is acting on to a list. It is then converted to an
|
||||
equivalent TN expression through the class function state_vector_operands()
|
||||
following the Einstein summation convention in the interleave format.
|
||||
|
||||
See document for detail of the format: https://docs.nvidia.com/cuda/cuquantum/python/api/generated/cuquantum.contract.html
|
||||
|
||||
The output is to be used by cuQuantum's contract() for computation of the
|
||||
state vectors of the circuit.
|
||||
"""
|
||||
|
||||
def __init__(self, circuit, dtype="complex128"):
|
||||
self.backend = _require_cupy()
|
||||
self.dtype = getattr(self.backend, dtype)
|
||||
self.init_basis_map(self.backend, dtype)
|
||||
self.init_intermediate_circuit(circuit)
|
||||
self.circuit = circuit
|
||||
|
||||
def state_vector_operands(self):
|
||||
"""Create the operands for dense vector computation in the interleave
|
||||
format.
|
||||
|
||||
Returns:
|
||||
Operands for the contraction in the interleave format.
|
||||
"""
|
||||
input_bitstring = "0" * len(self.active_qubits)
|
||||
|
||||
input_operands = self._get_bitstring_tensors(input_bitstring)
|
||||
|
||||
(
|
||||
mode_labels,
|
||||
qubits_frontier,
|
||||
next_frontier,
|
||||
) = self._init_mode_labels_from_qubits(self.active_qubits)
|
||||
|
||||
gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
|
||||
self.gate_tensors, qubits_frontier, next_frontier
|
||||
)
|
||||
|
||||
operands = input_operands + gate_operands
|
||||
mode_labels += gate_mode_labels
|
||||
|
||||
out_list = []
|
||||
for key in qubits_frontier:
|
||||
out_list.append(qubits_frontier[key])
|
||||
|
||||
operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
|
||||
operand_exp_interleave.append(out_list)
|
||||
return operand_exp_interleave
|
||||
|
||||
def _init_mode_labels_from_qubits(self, qubits):
|
||||
n = len(qubits)
|
||||
frontier_dict = {q: i for i, q in enumerate(qubits)}
|
||||
mode_labels = [[i] for i in range(n)]
|
||||
return mode_labels, frontier_dict, n
|
||||
|
||||
def _get_bitstring_tensors(self, bitstring):
|
||||
return [self.basis_map[ibit] for ibit in bitstring]
|
||||
|
||||
def _parse_gates_to_mode_labels_operands(
|
||||
self, gates, qubits_frontier, next_frontier
|
||||
):
|
||||
mode_labels = []
|
||||
operands = []
|
||||
|
||||
for tensor, gate_qubits in gates:
|
||||
operands.append(tensor)
|
||||
input_mode_labels = []
|
||||
output_mode_labels = []
|
||||
for q in gate_qubits:
|
||||
input_mode_labels.append(qubits_frontier[q])
|
||||
output_mode_labels.append(next_frontier)
|
||||
qubits_frontier[q] = next_frontier
|
||||
next_frontier += 1
|
||||
mode_labels.append(output_mode_labels + input_mode_labels)
|
||||
return mode_labels, operands
|
||||
|
||||
def op_shape_from_qubits(self, nqubits):
|
||||
"""Modify tensor to cuQuantum shape.
|
||||
|
||||
Parameters:
|
||||
nqubits (int): The number of qubits in quantum circuit.
|
||||
|
||||
Returns:
|
||||
(qubit_states,input_output) * nqubits
|
||||
"""
|
||||
return (2, 2) * nqubits
|
||||
|
||||
def init_intermediate_circuit(self, circuit):
|
||||
"""Initialize the intermediate circuit representation.
|
||||
|
||||
This method initializes the intermediate circuit representation by extracting gate matrices and qubit IDs
|
||||
from the given quantum circuit.
|
||||
|
||||
Parameters:
|
||||
circuit (object): The quantum circuit object.
|
||||
"""
|
||||
self.gate_tensors = []
|
||||
gates_qubits = []
|
||||
|
||||
for gate in circuit.queue:
|
||||
gate_qubits = gate.control_qubits + gate.target_qubits
|
||||
gates_qubits.extend(gate_qubits)
|
||||
|
||||
# self.gate_tensors is to extract into a list the gate matrix together with the qubit id that it is acting on
|
||||
# https://github.com/NVIDIA/cuQuantum/blob/6b6339358f859ea930907b79854b90b2db71ab92/python/cuquantum/cutensornet/_internal/circuit_parser_utils_cirq.py#L32
|
||||
required_shape = self.op_shape_from_qubits(len(gate_qubits))
|
||||
self.gate_tensors.append(
|
||||
(
|
||||
self.backend.asarray(gate.matrix(), dtype=self.dtype).reshape(
|
||||
required_shape
|
||||
),
|
||||
gate_qubits,
|
||||
)
|
||||
)
|
||||
|
||||
# self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
|
||||
self.active_qubits = np.unique(gates_qubits)
|
||||
|
||||
def init_basis_map(self, backend, dtype):
|
||||
"""Initialize the basis map for the quantum circuit.
|
||||
|
||||
This method initializes a basis map for the quantum circuit, which maps binary
|
||||
strings representing qubit states to their corresponding quantum state vectors.
|
||||
|
||||
Parameters:
|
||||
backend (object): The backend object providing the array conversion method.
|
||||
dtype (object): The data type for the quantum state vectors.
|
||||
"""
|
||||
asarray = backend.asarray
|
||||
state_0 = asarray([1, 0], dtype=dtype)
|
||||
state_1 = asarray([0, 1], dtype=dtype)
|
||||
|
||||
self.basis_map = {"0": state_0, "1": state_1}
|
||||
|
||||
def init_inverse_circuit(self, circuit):
|
||||
"""Initialize the inverse circuit representation.
|
||||
|
||||
This method initializes the inverse circuit representation by extracting gate matrices and qubit IDs
|
||||
from the given quantum circuit.
|
||||
|
||||
Parameters:
|
||||
circuit (object): The quantum circuit object.
|
||||
"""
|
||||
self.gate_tensors_inverse = []
|
||||
gates_qubits_inverse = []
|
||||
|
||||
for gate in circuit.queue:
|
||||
gate_qubits = gate.control_qubits + gate.target_qubits
|
||||
gates_qubits_inverse.extend(gate_qubits)
|
||||
|
||||
# self.gate_tensors is to extract into a list the gate matrix together with the qubit id that it is acting on
|
||||
# https://github.com/NVIDIA/cuQuantum/blob/6b6339358f859ea930907b79854b90b2db71ab92/python/cuquantum/cutensornet/_internal/circuit_parser_utils_cirq.py#L32
|
||||
required_shape = self.op_shape_from_qubits(len(gate_qubits))
|
||||
self.gate_tensors_inverse.append(
|
||||
(
|
||||
self.backend.asarray(gate.matrix()).reshape(required_shape),
|
||||
gate_qubits,
|
||||
)
|
||||
)
|
||||
|
||||
# self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
|
||||
self.active_qubits_inverse = np.unique(gates_qubits_inverse)
|
||||
|
||||
def get_pauli_gates(self, pauli_map, dtype="complex128", backend=None):
|
||||
"""Populate the gates for all pauli operators.
|
||||
|
||||
Parameters:
|
||||
pauli_map: A dictionary mapping qubits to pauli operators.
|
||||
dtype: Data type for the tensor operands.
|
||||
backend: The package the tensor operands belong to.
|
||||
|
||||
Returns:
|
||||
A sequence of pauli gates.
|
||||
"""
|
||||
if backend is None:
|
||||
backend = _require_cupy()
|
||||
asarray = backend.asarray
|
||||
pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
|
||||
pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
|
||||
pauli_y = asarray([[0, -1j], [1j, 0]], dtype=dtype)
|
||||
pauli_z = asarray([[1, 0], [0, -1]], dtype=dtype)
|
||||
|
||||
operand_map = {"I": pauli_i, "X": pauli_x, "Y": pauli_y, "Z": pauli_z}
|
||||
gates = []
|
||||
for qubit, pauli_char in pauli_map.items():
|
||||
operand = operand_map.get(pauli_char)
|
||||
if operand is None:
|
||||
raise ValueError("pauli string character must be one of I/X/Y/Z")
|
||||
gates.append((operand, (qubit,)))
|
||||
return gates
|
||||
|
||||
def expectation_operands(self, ham_gates):
|
||||
"""Create the operands for pauli string expectation computation in the
|
||||
interleave format.
|
||||
|
||||
Parameters:
|
||||
ham_gates: A list of gates derived from Qibo hamiltonian object.
|
||||
|
||||
Returns:
|
||||
Operands for the contraction in the interleave format.
|
||||
"""
|
||||
input_bitstring = "0" * self.circuit.nqubits
|
||||
|
||||
input_operands = self._get_bitstring_tensors(input_bitstring)
|
||||
|
||||
(
|
||||
mode_labels,
|
||||
qubits_frontier,
|
||||
next_frontier,
|
||||
) = self._init_mode_labels_from_qubits(range(self.circuit.nqubits))
|
||||
|
||||
gate_mode_labels, gate_operands = self._parse_gates_to_mode_labels_operands(
|
||||
self.gate_tensors, qubits_frontier, next_frontier
|
||||
)
|
||||
|
||||
operands = input_operands + gate_operands
|
||||
mode_labels += gate_mode_labels
|
||||
|
||||
self.init_inverse_circuit(self.circuit.invert())
|
||||
|
||||
next_frontier = max(qubits_frontier.values()) + 1
|
||||
|
||||
gates_inverse = ham_gates + self.gate_tensors_inverse
|
||||
|
||||
(
|
||||
gate_mode_labels_inverse,
|
||||
gate_operands_inverse,
|
||||
) = self._parse_gates_to_mode_labels_operands(
|
||||
gates_inverse, qubits_frontier, next_frontier
|
||||
)
|
||||
mode_labels = (
|
||||
mode_labels
|
||||
+ gate_mode_labels_inverse
|
||||
+ [[qubits_frontier[ix]] for ix in range(self.circuit.nqubits)]
|
||||
)
|
||||
operands = operands + gate_operands_inverse + operands[: self.circuit.nqubits]
|
||||
|
||||
operand_exp_interleave = [x for y in zip(operands, mode_labels) for x in y]
|
||||
|
||||
return operand_exp_interleave
|
||||
@@ -1,63 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from qibotn.circuit_convertor import QiboCircuitToEinsum
|
||||
from qibotn.mps_utils import apply_gate, initial
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
cutn = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if cp is None or cutn is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS converter requires cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
|
||||
class QiboCircuitToMPS:
|
||||
"""A helper class to convert Qibo circuit to MPS.
|
||||
|
||||
Parameters:
|
||||
circ_qibo: The quantum circuit object.
|
||||
gate_algo(dict): Dictionary for SVD and QR settings.
|
||||
datatype (str): Either single ("complex64") or double (complex128) precision.
|
||||
rand_seed(int): Seed for random number generator.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
circ_qibo,
|
||||
gate_algo,
|
||||
dtype="complex128",
|
||||
rand_seed=0,
|
||||
):
|
||||
_require_cuquantum()
|
||||
np.random.seed(rand_seed)
|
||||
cp.random.seed(rand_seed)
|
||||
|
||||
self.num_qubits = circ_qibo.nqubits
|
||||
self.handle = cutn.create()
|
||||
self.dtype = dtype
|
||||
self.mps_tensors = initial(self.num_qubits, dtype=dtype)
|
||||
circuitconvertor = QiboCircuitToEinsum(circ_qibo, dtype=dtype)
|
||||
|
||||
for gate, qubits in circuitconvertor.gate_tensors:
|
||||
# mapping from qubits to qubit indices
|
||||
# apply the gate in-place
|
||||
apply_gate(
|
||||
self.mps_tensors,
|
||||
gate,
|
||||
qubits,
|
||||
algorithm=gate_algo,
|
||||
options={"handle": self.handle},
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
handle = getattr(self, "handle", None)
|
||||
if cutn is not None and handle is not None:
|
||||
cutn.destroy(handle)
|
||||
241
src/qibotn/contest_cases.py
Normal file
241
src/qibotn/contest_cases.py
Normal file
@@ -0,0 +1,241 @@
|
||||
"""Shared contest-style circuits and observables for qibotn tools."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
from qibotn.backends.quimb import quimb_torch_parallel_opts
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CaseSpec:
|
||||
circuit_kind: str
|
||||
observables: tuple[str, ...]
|
||||
nqubits: int
|
||||
nlayers: int
|
||||
seed: int
|
||||
target_slices: int | None = None
|
||||
|
||||
|
||||
CASES = {
|
||||
"main1": CaseSpec(
|
||||
circuit_kind="rxx_rzz_chain",
|
||||
observables=("ring_xz",),
|
||||
nqubits=37,
|
||||
nlayers=20,
|
||||
seed=31001,
|
||||
target_slices=None,
|
||||
),
|
||||
"main2": CaseSpec(
|
||||
circuit_kind="scramble_chain",
|
||||
observables=("open_zz", "range2_xx"),
|
||||
nqubits=36,
|
||||
nlayers=18,
|
||||
seed=31002,
|
||||
target_slices=None,
|
||||
),
|
||||
"strong": CaseSpec(
|
||||
circuit_kind="reversed_cnot",
|
||||
observables=("ring_xz", "long_z_string"),
|
||||
nqubits=40,
|
||||
nlayers=24,
|
||||
seed=41001,
|
||||
target_slices=None,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def _add_single_qubit_layer(circuit, nqubits, rng, include_rx=False):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if include_rx:
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
|
||||
def _add_brickwall(circuit, nqubits, gate, layer, reverse=False):
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
if reverse and layer % 2:
|
||||
circuit.add(gate(qubit + 1, qubit))
|
||||
else:
|
||||
circuit.add(gate(qubit, qubit + 1))
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
if reverse and not layer % 2:
|
||||
circuit.add(gate(qubit + 1, qubit))
|
||||
else:
|
||||
circuit.add(gate(qubit, qubit + 1))
|
||||
|
||||
|
||||
def build_contest_circuit(kind, nqubits, nlayers, seed):
|
||||
"""Build one of the contest-style benchmark circuits."""
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
if kind == "ghz_ladder":
|
||||
circuit.add(gates.H(0))
|
||||
for qubit in range(nqubits - 1):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
return circuit
|
||||
|
||||
for layer in range(nlayers):
|
||||
if kind in {"brickwall_cnot", "reversed_cnot", "shifted_cz"}:
|
||||
_add_single_qubit_layer(circuit, nqubits, rng)
|
||||
elif kind in {"rxx_rzz", "swap_scramble"}:
|
||||
_add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
elif kind in {"rxx_rzz_chain", "scramble_chain", "scramble"}:
|
||||
_add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
if kind == "brickwall_cnot":
|
||||
_add_brickwall(circuit, nqubits, gates.CNOT, layer, reverse=False)
|
||||
elif kind == "reversed_cnot":
|
||||
_add_brickwall(circuit, nqubits, gates.CNOT, layer, reverse=True)
|
||||
elif kind == "shifted_cz":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(qubit, qubit + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
elif kind == "swap_scramble":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(qubit, qubit + 1))
|
||||
if layer % 4 == 3:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
elif kind == "rxx_rzz_chain":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
elif kind == "scramble_chain":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
elif kind == "scramble":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def _dense_observable(nqubits, qubits, seed, dim):
|
||||
del nqubits
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def build_contest_observable(kind, nqubits, seed=0):
|
||||
"""Build one of the shared contest observables."""
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
if kind == "ring_xz":
|
||||
form = 0
|
||||
for qubit in range(nqubits):
|
||||
form += 0.5 * X(qubit) * Z((qubit + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "open_zz":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 1):
|
||||
form += (1.0 / max(1, nqubits - 1)) * Z(qubit) * Z(qubit + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "range2_xx":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 2):
|
||||
form += (1.0 / max(1, nqubits - 2)) * X(qubit) * X(qubit + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "mixed_local":
|
||||
form = 0.25 * X(0) - 0.5 * Z(last) + 0.125 * X(q1) * Z(q2) * Y(q3)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "long_z_string":
|
||||
stride = max(1, nqubits // 16)
|
||||
form = None
|
||||
for qubit in range(0, nqubits, stride):
|
||||
form = Z(qubit) if form is None else form * Z(qubit)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "boundary_ZZ_q1":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))
|
||||
if kind == "boundary_ZZ_q2":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))
|
||||
if kind == "boundary_ZZ_q3":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))
|
||||
if kind == "long_Z_5_sites":
|
||||
return hamiltonians.SymbolicHamiltonian(
|
||||
form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last)
|
||||
)
|
||||
if kind == "mixed_XZYZX":
|
||||
return hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last))
|
||||
if kind == "complex_iZ0":
|
||||
return hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))
|
||||
if kind == "dense2_mid":
|
||||
return _dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)
|
||||
if kind == "dense3_spread":
|
||||
return _dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def tree_path(tree_dir, case_name, obs_name, nqubits, nlayers, target_slices, merge_gates=True):
|
||||
slice_label = "auto" if target_slices is None else f"s{target_slices}"
|
||||
merge_label = "merge" if merge_gates else "nomerge"
|
||||
return (
|
||||
Path(tree_dir)
|
||||
/ f"{case_name}_{obs_name}_{nqubits}q{nlayers}l_{slice_label}_{merge_label}.pkl"
|
||||
)
|
||||
|
||||
|
||||
def selected_observables(args, case):
|
||||
if args.observables:
|
||||
return tuple(args.observables)
|
||||
if args.obs_filter:
|
||||
return tuple(x.strip() for x in args.obs_filter.split(",") if x.strip())
|
||||
return case.observables
|
||||
|
||||
|
||||
def apply_case_defaults(args):
|
||||
case = CASES[args.case]
|
||||
if args.nqubits is None:
|
||||
args.nqubits = case.nqubits
|
||||
if args.nlayers is None:
|
||||
args.nlayers = case.nlayers
|
||||
if args.seed is None:
|
||||
args.seed = case.seed
|
||||
if args.tn_target_slices is None:
|
||||
args.tn_target_slices = case.target_slices
|
||||
args.observables = selected_observables(args, case)
|
||||
|
||||
|
||||
def build_parallel_opts(args, tree_file=None, search_only=False):
|
||||
return quimb_torch_parallel_opts(
|
||||
target_slices=args.tn_target_slices,
|
||||
target_size=args.tn_target_size,
|
||||
search_workers=args.tn_search_workers,
|
||||
torch_threads=args.torch_threads,
|
||||
search_repeats=args.tn_search_repeats,
|
||||
search_time=args.tn_search_time,
|
||||
search_seed=args.tn_search_seed,
|
||||
merge_gates=args.merge_gates,
|
||||
search_backend=args.tn_search_backend,
|
||||
dask_address=args.dask_address,
|
||||
dask_expected_workers=args.dask_expected_workers,
|
||||
dask_close_workers=args.dask_close_workers,
|
||||
debug_trials=args.tn_debug_trials,
|
||||
search_only=search_only,
|
||||
save_tree_path=str(tree_file) if tree_file is not None else None,
|
||||
load_tree_path=str(tree_file) if tree_file is not None else None,
|
||||
print_stats=False,
|
||||
)
|
||||
@@ -1,8 +1,10 @@
|
||||
from mpi4py import MPI
|
||||
|
||||
from qibotn.circuit_convertor import QiboCircuitToEinsum
|
||||
from qibotn.circuit_to_mps import QiboCircuitToMPS
|
||||
from qibotn.mps_contraction_helper import MPSContractionHelper
|
||||
from qibotn.backends.cutensornet_helpers import (
|
||||
MPSContractionHelper,
|
||||
QiboCircuitToEinsum,
|
||||
QiboCircuitToMPS,
|
||||
)
|
||||
from qibotn.observables import (
|
||||
build_observable,
|
||||
check_observable,
|
||||
|
||||
@@ -8,10 +8,62 @@ from dataclasses import dataclass
|
||||
import numpy as np
|
||||
from qibo.backends import construct_backend
|
||||
|
||||
from qibotn.benchmark_cases import exact_pauli_sum
|
||||
from qibotn.benchmark_cases import (
|
||||
CIRCUITS,
|
||||
OBSERVABLES,
|
||||
build_circuit,
|
||||
exact_pauli_sum,
|
||||
observable_terms,
|
||||
parse_names,
|
||||
terms_to_dict,
|
||||
)
|
||||
from qibotn.observables import check_observable
|
||||
|
||||
|
||||
def cpu_runcard(
|
||||
observable=None,
|
||||
*,
|
||||
ansatz: str = "tn",
|
||||
mpi: bool = False,
|
||||
bond: int | None = 1024,
|
||||
cut_ratio: float | None = 1e-12,
|
||||
tensor_module: str = "torch",
|
||||
quimb_backend: str = "torch",
|
||||
dtype: str = "complex128",
|
||||
torch_threads: int | None = 8,
|
||||
parallel_opts: dict | None = None,
|
||||
compile_circuit: bool = False,
|
||||
preprocess: bool = False,
|
||||
):
|
||||
"""Build the small CPU backend runcard used throughout qibotn."""
|
||||
return {
|
||||
"MPI_enabled": mpi,
|
||||
"MPS_enabled": ansatz.lower() == "mps",
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable if observable is not None else False,
|
||||
"max_bond_dimension": bond,
|
||||
"cut_ratio": cut_ratio,
|
||||
"tensor_module": tensor_module,
|
||||
"quimb_backend": quimb_backend,
|
||||
"dtype": dtype,
|
||||
"torch_threads": torch_threads,
|
||||
"parallel_opts": parallel_opts or {},
|
||||
"compile_circuit": compile_circuit,
|
||||
"preprocess": preprocess,
|
||||
}
|
||||
|
||||
|
||||
def cpu_backend(**kwargs):
|
||||
"""Return a configured qibotn CPU backend.
|
||||
|
||||
Example:
|
||||
``backend = cpu_backend(ansatz="mps", bond=512, torch_threads=8)``
|
||||
"""
|
||||
from qibotn.backends.cpu import CpuTensorNet
|
||||
|
||||
return CpuTensorNet(cpu_runcard(**kwargs))
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExpectationConfig:
|
||||
ansatz: str = "tn"
|
||||
@@ -33,6 +85,27 @@ class ExpectationResult:
|
||||
parallel_stats: list | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkExpectationRecord:
|
||||
circuit: str
|
||||
observable: str
|
||||
value: float
|
||||
seconds: float
|
||||
exact: float | None = None
|
||||
abs_error: float | None = None
|
||||
rel_error: float | None = None
|
||||
parallel_stats: list | None = None
|
||||
|
||||
|
||||
def _config_from_kwargs(**kwargs):
|
||||
fields = ExpectationConfig.__dataclass_fields__
|
||||
config_kwargs = {name: kwargs.pop(name) for name in list(kwargs) if name in fields}
|
||||
if kwargs:
|
||||
unknown = ", ".join(sorted(kwargs))
|
||||
raise TypeError(f"Unknown expectation option(s): {unknown}")
|
||||
return ExpectationConfig(**config_kwargs)
|
||||
|
||||
|
||||
def exact_for_observable(circuit, observable, nqubits):
|
||||
if isinstance(observable, dict) and "terms" in observable:
|
||||
terms = [
|
||||
@@ -49,19 +122,18 @@ def exact_for_observable(circuit, observable, nqubits):
|
||||
|
||||
|
||||
def run_cpu_expectation(circuit, observable, config):
|
||||
runcard = {
|
||||
"MPI_enabled": config.mpi,
|
||||
"MPS_enabled": config.ansatz.lower() == "mps",
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
"max_bond_dimension": config.bond,
|
||||
"cut_ratio": config.cut_ratio,
|
||||
"tensor_module": config.tensor_module,
|
||||
"quimb_backend": config.quimb_backend,
|
||||
"dtype": config.dtype,
|
||||
"torch_threads": config.torch_threads,
|
||||
"parallel_opts": config.parallel_opts or {},
|
||||
}
|
||||
runcard = cpu_runcard(
|
||||
observable,
|
||||
ansatz=config.ansatz,
|
||||
mpi=config.mpi,
|
||||
bond=config.bond,
|
||||
cut_ratio=config.cut_ratio,
|
||||
tensor_module=config.tensor_module,
|
||||
quimb_backend=config.quimb_backend,
|
||||
dtype=config.dtype,
|
||||
torch_threads=config.torch_threads,
|
||||
parallel_opts=config.parallel_opts,
|
||||
)
|
||||
backend = construct_backend(
|
||||
backend="qibotn",
|
||||
platform="cpu",
|
||||
@@ -80,3 +152,171 @@ def run_cpu_expectation(circuit, observable, config):
|
||||
rank=rank,
|
||||
parallel_stats=list(stats) if stats is not None else None,
|
||||
)
|
||||
|
||||
|
||||
def cpu_expectation(circuit, observable=None, *, return_result=False, **kwargs):
|
||||
"""Compute a CPU TN/MPS expectation with concise keyword options.
|
||||
|
||||
This is the preferred API for small scripts. Common options are
|
||||
``ansatz="tn" | "mps"``, ``bond``, ``cut_ratio``, ``mpi``,
|
||||
``torch_threads``, ``quimb_backend`` and ``parallel_opts``.
|
||||
"""
|
||||
config = _config_from_kwargs(**kwargs)
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
return result if return_result else result.value
|
||||
|
||||
|
||||
def mps_expectation(circuit, observable=None, *, return_result=False, **kwargs):
|
||||
"""Compute expectation using the CPU Vidal/MPS path when possible."""
|
||||
kwargs.setdefault("ansatz", "mps")
|
||||
return cpu_expectation(
|
||||
circuit,
|
||||
observable,
|
||||
return_result=return_result,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def cpu_benchmark_parallel_opts(
|
||||
*,
|
||||
target_slices=None,
|
||||
target_size=2**32,
|
||||
search_workers=None,
|
||||
torch_threads=8,
|
||||
search_repeats=128,
|
||||
search_time=60.0,
|
||||
search_backend="dask",
|
||||
dask_address=None,
|
||||
dask_close_workers=False,
|
||||
save_tree_path=None,
|
||||
load_tree_path=None,
|
||||
search_only=False,
|
||||
debug_trials=False,
|
||||
contract_implementation=None,
|
||||
print_stats=True,
|
||||
):
|
||||
"""Build parallel TN options for the CPU expectation backend."""
|
||||
slicing_opts = {}
|
||||
if target_slices is not None:
|
||||
slicing_opts["target_slices"] = target_slices
|
||||
if target_size is not None:
|
||||
slicing_opts["target_size"] = target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": search_workers or torch_threads,
|
||||
"max_repeats": search_repeats,
|
||||
"max_time": search_time,
|
||||
"print_stats": print_stats,
|
||||
}
|
||||
if search_backend is not None:
|
||||
opts["search_backend"] = search_backend
|
||||
if dask_address is not None:
|
||||
opts["dask_address"] = dask_address
|
||||
if save_tree_path is not None:
|
||||
opts["save_tree_path"] = save_tree_path
|
||||
if load_tree_path is not None:
|
||||
opts["load_tree_path"] = load_tree_path
|
||||
if search_only:
|
||||
opts["search_only"] = True
|
||||
if debug_trials:
|
||||
opts["debug_trials"] = True
|
||||
if contract_implementation is not None:
|
||||
opts["contract_implementation"] = contract_implementation
|
||||
if dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
return opts
|
||||
|
||||
|
||||
def run_cpu_benchmark_cases(
|
||||
*,
|
||||
nqubits=40,
|
||||
nlayers=30,
|
||||
bond=1024,
|
||||
cut_ratio=1e-12,
|
||||
seed=42,
|
||||
torch_threads=8,
|
||||
quimb_backend="torch",
|
||||
dtype="complex128",
|
||||
ansatz="tn",
|
||||
mpi=False,
|
||||
exact=False,
|
||||
exact_max_qubits=24,
|
||||
circuits=("brickwall_cnot",),
|
||||
observables=("ring_xz",),
|
||||
pauli_pattern=None,
|
||||
parallel_opts=None,
|
||||
):
|
||||
"""Run the reusable CPU TN/MPS benchmark cases.
|
||||
|
||||
This is the importable library entrypoint for reusable CPU benchmark cases.
|
||||
"""
|
||||
selected_circuits = parse_names(list(circuits), CIRCUITS, "circuits")
|
||||
selected_observables = (
|
||||
[]
|
||||
if pauli_pattern
|
||||
else parse_names(list(observables), OBSERVABLES, "observables")
|
||||
)
|
||||
|
||||
rank = 0
|
||||
if mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz=ansatz,
|
||||
mpi=mpi,
|
||||
bond=bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=quimb_backend,
|
||||
dtype=dtype,
|
||||
torch_threads=torch_threads,
|
||||
parallel_opts=parallel_opts or {},
|
||||
)
|
||||
|
||||
records = []
|
||||
for circuit_kind in selected_circuits:
|
||||
circuit = build_circuit(circuit_kind, nqubits, nlayers, seed)
|
||||
named_observables = (
|
||||
[(f"pattern:{pauli_pattern}", {"pauli_string_pattern": pauli_pattern})]
|
||||
if pauli_pattern
|
||||
else [
|
||||
(obs_kind, terms_to_dict(observable_terms(obs_kind, nqubits)))
|
||||
for obs_kind in selected_observables
|
||||
]
|
||||
)
|
||||
|
||||
for obs_name, observable in named_observables:
|
||||
exact_value = None
|
||||
if exact and rank == 0:
|
||||
if nqubits > exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"exact reference is limited to {exact_max_qubits} qubits."
|
||||
)
|
||||
exact_value = exact_for_observable(circuit, observable, nqubits)
|
||||
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if mpi and result.rank != 0:
|
||||
continue
|
||||
|
||||
abs_error = None if exact_value is None else abs(result.value - exact_value)
|
||||
rel_error = (
|
||||
None
|
||||
if exact_value is None
|
||||
else abs_error / max(abs(exact_value), 1e-15)
|
||||
)
|
||||
records.append(
|
||||
BenchmarkExpectationRecord(
|
||||
circuit=circuit_kind,
|
||||
observable=obs_name,
|
||||
value=result.value,
|
||||
seconds=result.seconds,
|
||||
exact=exact_value,
|
||||
abs_error=abs_error,
|
||||
rel_error=rel_error,
|
||||
parallel_stats=result.parallel_stats,
|
||||
)
|
||||
)
|
||||
return records
|
||||
|
||||
@@ -1,131 +0,0 @@
|
||||
try:
|
||||
from cuquantum.tensornet import contract, contract_path
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
contract = None
|
||||
contract_path = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if contract is None or contract_path is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS contraction helper requires cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
# Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
|
||||
|
||||
|
||||
class MPSContractionHelper:
|
||||
"""A helper class to compute various quantities for a given MPS.
|
||||
|
||||
Interleaved format is used to construct the input args for `cuquantum.contract`.
|
||||
|
||||
Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
|
||||
|
||||
The following compute quantities are supported:
|
||||
|
||||
- the norm of the MPS.
|
||||
- the equivalent state vector from the MPS.
|
||||
- the expectation value for a given operator.
|
||||
- the equivalent state vector after multiplying an MPO to an MPS.
|
||||
|
||||
Parameters:
|
||||
num_qubits: The number of qubits for the MPS.
|
||||
"""
|
||||
|
||||
def __init__(self, num_qubits):
|
||||
self.num_qubits = num_qubits
|
||||
self.bra_modes = [(2 * i, 2 * i + 1, 2 * i + 2) for i in range(num_qubits)]
|
||||
offset = 2 * num_qubits + 1
|
||||
self.ket_modes = [
|
||||
(i + offset, 2 * i + 1, i + 1 + offset) for i in range(num_qubits)
|
||||
]
|
||||
|
||||
def contract_norm(self, mps_tensors, options=None):
|
||||
"""Contract the corresponding tensor network to form the norm of the
|
||||
MPS.
|
||||
|
||||
Parameters:
|
||||
mps_tensors: A list of rank-3 ndarray-like tensor objects.
|
||||
The indices of the ith tensor are expected to be bonding index to the i-1 tensor,
|
||||
the physical mode, and then the bonding index to the i+1th tensor.
|
||||
options: Specify the contract and decompose options.
|
||||
|
||||
Returns:
|
||||
The norm of the MPS.
|
||||
"""
|
||||
interleaved_inputs = []
|
||||
for i, o in enumerate(mps_tensors):
|
||||
interleaved_inputs.extend(
|
||||
[o, self.bra_modes[i], o.conj(), self.ket_modes[i]]
|
||||
)
|
||||
interleaved_inputs.append([]) # output
|
||||
return self._contract(interleaved_inputs, options=options).real
|
||||
|
||||
def contract_state_vector(self, mps_tensors, options=None):
|
||||
"""Contract the corresponding tensor network to form the state vector
|
||||
representation of the MPS.
|
||||
|
||||
Parameters:
|
||||
mps_tensors: A list of rank-3 ndarray-like tensor objects.
|
||||
The indices of the ith tensor are expected to be bonding index to the i-1 tensor,
|
||||
the physical mode, and then the bonding index to the i+1th tensor.
|
||||
options: Specify the contract and decompose options.
|
||||
|
||||
Returns:
|
||||
An ndarray-like object as the state vector.
|
||||
"""
|
||||
interleaved_inputs = []
|
||||
for i, o in enumerate(mps_tensors):
|
||||
interleaved_inputs.extend([o, self.bra_modes[i]])
|
||||
output_modes = tuple([bra_modes[1] for bra_modes in self.bra_modes])
|
||||
interleaved_inputs.append(output_modes) # output
|
||||
return self._contract(interleaved_inputs, options=options)
|
||||
|
||||
def contract_expectation(
|
||||
self, mps_tensors, operator, qubits, options=None, normalize=False
|
||||
):
|
||||
"""Contract the corresponding tensor network to form the expectation of
|
||||
the MPS.
|
||||
|
||||
Parameters:
|
||||
mps_tensors: A list of rank-3 ndarray-like tensor objects.
|
||||
The indices of the ith tensor are expected to be bonding index to the i-1 tensor,
|
||||
the physical mode, and then the bonding index to the i+1th tensor.
|
||||
operator: A ndarray-like tensor object.
|
||||
The modes of the operator are expected to be output qubits followed by input qubits, e.g,
|
||||
``A, B, a, b`` where `a, b` denotes the inputs and `A, B'` denotes the outputs.
|
||||
qubits: A sequence of integers specifying the qubits that the operator is acting on.
|
||||
options: Specify the contract and decompose options.
|
||||
normalize: Whether to scale the expectation value by the normalization factor.
|
||||
|
||||
Returns:
|
||||
An ndarray-like object as the state vector.
|
||||
"""
|
||||
|
||||
interleaved_inputs = []
|
||||
extra_mode = 3 * self.num_qubits + 2
|
||||
operator_modes = [None] * len(qubits) + [self.bra_modes[q][1] for q in qubits]
|
||||
qubits = list(qubits)
|
||||
for i, o in enumerate(mps_tensors):
|
||||
interleaved_inputs.extend([o, self.bra_modes[i]])
|
||||
k_modes = self.ket_modes[i]
|
||||
if i in qubits:
|
||||
k_modes = (k_modes[0], extra_mode, k_modes[2])
|
||||
q = qubits.index(i)
|
||||
operator_modes[q] = extra_mode # output modes
|
||||
extra_mode += 1
|
||||
interleaved_inputs.extend([o.conj(), k_modes])
|
||||
interleaved_inputs.extend([operator, tuple(operator_modes)])
|
||||
interleaved_inputs.append([]) # output
|
||||
if normalize:
|
||||
norm = self.contract_norm(mps_tensors, options=options)
|
||||
else:
|
||||
norm = 1
|
||||
return self._contract(interleaved_inputs, options=options) / norm
|
||||
|
||||
def _contract(self, interleaved_inputs, options=None):
|
||||
_require_cuquantum()
|
||||
path = contract_path(*interleaved_inputs, options=options)[0]
|
||||
|
||||
return contract(*interleaved_inputs, options=options, optimize={"path": path})
|
||||
@@ -1,111 +0,0 @@
|
||||
try:
|
||||
import cupy as cp
|
||||
from cuquantum.tensornet import contract
|
||||
from cuquantum.tensornet.experimental import contract_decompose
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
contract = None
|
||||
contract_decompose = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if cp is None or contract is None or contract_decompose is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS helpers require cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
|
||||
def initial(num_qubits, dtype):
|
||||
r"""Generate the MPS with an initial state of :math:`\ket{00...00}`
|
||||
|
||||
Parameters:
|
||||
num_qubits: Number of qubits in the Quantum Circuit.
|
||||
dtype: Either single ("complex64") or double (complex128) precision.
|
||||
|
||||
Returns:
|
||||
The initial MPS tensors.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
state_tensor = cp.asarray([1, 0], dtype=dtype).reshape(1, 2, 1)
|
||||
mps_tensors = [state_tensor] * num_qubits
|
||||
return mps_tensors
|
||||
|
||||
|
||||
def mps_site_right_swap(mps_tensors, i, **kwargs):
|
||||
"""Perform the swap operation between the ith and i+1th MPS tensors.
|
||||
|
||||
Parameters:
|
||||
mps_tensors: Tensors representing MPS
|
||||
i (int): index of the tensor to swap
|
||||
|
||||
Returns:
|
||||
The updated MPS tensors.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
# contraction followed by QR decomposition
|
||||
a, _, b = contract_decompose(
|
||||
"ipj,jqk->iqj,jpk",
|
||||
*mps_tensors[i : i + 2],
|
||||
algorithm=kwargs.get("algorithm", None),
|
||||
options=kwargs.get("options", None),
|
||||
)
|
||||
mps_tensors[i : i + 2] = (a, b)
|
||||
return mps_tensors
|
||||
|
||||
|
||||
def apply_gate(mps_tensors, gate, qubits, **kwargs):
|
||||
"""Apply the gate operand to the MPS tensors in-place.
|
||||
|
||||
# Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
|
||||
|
||||
Parameters:
|
||||
mps_tensors: A list of rank-3 ndarray-like tensor objects.
|
||||
The indices of the ith tensor are expected to be the bonding index to the i-1 tensor,
|
||||
the physical mode, and then the bonding index to the i+1th tensor.
|
||||
gate: A ndarray-like tensor object representing the gate operand.
|
||||
The modes of the gate is expected to be output qubits followed by input qubits, e.g,
|
||||
``A, B, a, b`` where ``a, b`` denotes the inputs and ``A, B`` denotes the outputs.
|
||||
qubits: A sequence of integers denoting the qubits that the gate is applied onto.
|
||||
algorithm: The contract and decompose algorithm to use for gate application.
|
||||
Can be either a `dict` or a `ContractDecomposeAlgorithm`.
|
||||
options: Specify the contract and decompose options.
|
||||
|
||||
Returns:
|
||||
The updated MPS tensors.
|
||||
"""
|
||||
|
||||
_require_cuquantum()
|
||||
n_qubits = len(qubits)
|
||||
if n_qubits == 1:
|
||||
# single-qubit gate
|
||||
i = qubits[0]
|
||||
mps_tensors[i] = contract(
|
||||
"ipj,qp->iqj", mps_tensors[i], gate, options=kwargs.get("options", None)
|
||||
) # in-place update
|
||||
elif n_qubits == 2:
|
||||
# two-qubit gate
|
||||
i, j = qubits
|
||||
if i > j:
|
||||
# swap qubits order
|
||||
return apply_gate(mps_tensors, gate.transpose(1, 0, 3, 2), (j, i), **kwargs)
|
||||
elif i + 1 == j:
|
||||
# two adjacent qubits
|
||||
a, _, b = contract_decompose(
|
||||
"ipj,jqk,rspq->irj,jsk",
|
||||
*mps_tensors[i : i + 2],
|
||||
gate,
|
||||
algorithm=kwargs.get("algorithm", None),
|
||||
options=kwargs.get("options", None),
|
||||
)
|
||||
mps_tensors[i : i + 2] = (a, b) # in-place update
|
||||
else:
|
||||
# non-adjacent two-qubit gate
|
||||
# step 1: swap i with i+1
|
||||
mps_site_right_swap(mps_tensors, i, **kwargs)
|
||||
# step 2: apply gate to (i+1, j) pair. This amounts to a recursive swap until the two qubits are adjacent
|
||||
apply_gate(mps_tensors, gate, (i + 1, j), **kwargs)
|
||||
# step 3: swap back i and i+1
|
||||
mps_site_right_swap(mps_tensors, i, **kwargs)
|
||||
else:
|
||||
raise NotImplementedError("Only one- and two-qubit gates supported")
|
||||
@@ -4,6 +4,30 @@ from qibo import hamiltonians
|
||||
from qibo.symbols import I, X, Y, Z
|
||||
|
||||
|
||||
def pauli_pattern(pattern):
|
||||
"""Return the compact qibotn representation of a repeated Pauli string."""
|
||||
return {"pauli_string_pattern": pattern}
|
||||
|
||||
|
||||
def pauli_sum(*terms):
|
||||
"""Return the compact qibotn representation of a Pauli sum.
|
||||
|
||||
Each term is ``(coefficient, operators)`` where operators are pairs like
|
||||
``("X", 0)``. Example:
|
||||
|
||||
``pauli_sum((0.5, [("X", 0), ("Z", 1)]), (-1.0, [("Z", 3)]))``
|
||||
"""
|
||||
return {
|
||||
"terms": [
|
||||
{
|
||||
"coefficient": coeff,
|
||||
"operators": [(name, int(site)) for name, site in operators],
|
||||
}
|
||||
for coeff, operators in terms
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def check_observable(observable, circuit_nqubit):
|
||||
"""Checks the type of observable and returns the appropriate Hamiltonian."""
|
||||
if observable is None:
|
||||
@@ -11,7 +35,17 @@ def check_observable(observable, circuit_nqubit):
|
||||
if isinstance(observable, dict):
|
||||
return create_hamiltonian_from_dict(observable, circuit_nqubit)
|
||||
if isinstance(observable, hamiltonians.SymbolicHamiltonian):
|
||||
return observable
|
||||
if observable.nqubits == circuit_nqubit:
|
||||
return observable
|
||||
if observable.nqubits > circuit_nqubit:
|
||||
raise ValueError(
|
||||
"Observable has more qubits than the circuit: "
|
||||
f"{observable.nqubits} > {circuit_nqubit}."
|
||||
)
|
||||
return hamiltonians.SymbolicHamiltonian(
|
||||
form=observable.form,
|
||||
nqubits=circuit_nqubit,
|
||||
)
|
||||
try:
|
||||
return hamiltonians.SymbolicHamiltonian(form=observable)
|
||||
except Exception as exc:
|
||||
@@ -20,11 +54,10 @@ def check_observable(observable, circuit_nqubit):
|
||||
|
||||
def build_observable(circuit_nqubit):
|
||||
"""Construct the default benchmark observable used by qibotn."""
|
||||
hamiltonian_form = 0
|
||||
for i in range(circuit_nqubit):
|
||||
hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit)
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
|
||||
form = sum(
|
||||
0.5 * X(i) * Z((i + 1) % circuit_nqubit) for i in range(circuit_nqubit)
|
||||
)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def create_hamiltonian_from_dict(data, circuit_nqubit):
|
||||
@@ -50,7 +83,6 @@ def create_hamiltonian_from_dict(data, circuit_nqubit):
|
||||
term_expr = full_term_expr[0]
|
||||
for op in full_term_expr[1:]:
|
||||
term_expr *= op
|
||||
|
||||
terms.append(coeff * term_expr)
|
||||
|
||||
if not terms:
|
||||
@@ -84,23 +116,20 @@ def create_hamiltonian_from_pauli_pattern(pattern, circuit_nqubit):
|
||||
continue
|
||||
factor = pauli_gates[name](qubit)
|
||||
expr = factor if expr is None else expr * factor
|
||||
|
||||
if expr is None:
|
||||
expr = I(0)
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(form=expr)
|
||||
return hamiltonians.SymbolicHamiltonian(form=expr or I(0))
|
||||
|
||||
|
||||
def build_random_circuit(nqubits, nlayers, seed=42):
|
||||
"""Build a random circuit with RY+RZ+CNOT layers for benchmarks."""
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates
|
||||
np.random.seed(seed)
|
||||
|
||||
rng = np.random.default_rng(seed)
|
||||
c = Circuit(nqubits)
|
||||
for _ in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
c.add(gates.RY(q, theta=np.random.uniform(0, 2*np.pi)))
|
||||
c.add(gates.RZ(q, theta=np.random.uniform(0, 2*np.pi)))
|
||||
c.add(gates.RY(q, theta=rng.uniform(0, 2 * np.pi)))
|
||||
c.add(gates.RZ(q, theta=rng.uniform(0, 2 * np.pi)))
|
||||
for q in range(nqubits):
|
||||
c.add(gates.CNOT(q % nqubits, (q + 1) % nqubits))
|
||||
return c
|
||||
|
||||
@@ -1,12 +1,16 @@
|
||||
"""Parallel path search and contraction utilities for tensor networks."""
|
||||
import importlib
|
||||
import os
|
||||
import pickle
|
||||
import signal
|
||||
import time
|
||||
from math import log2, log10
|
||||
import numpy as np
|
||||
from dataclasses import dataclass
|
||||
from collections import Counter, defaultdict
|
||||
from concurrent.futures import ProcessPoolExecutor, TimeoutError, as_completed
|
||||
from dataclasses import dataclass
|
||||
from math import log2, log10
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
@@ -40,6 +44,12 @@ def _optimizer_search_stats(opt):
|
||||
}
|
||||
|
||||
|
||||
def _tree_search_stats(tree):
|
||||
if tree is None:
|
||||
return {}
|
||||
return getattr(tree, "qibotn_search_stats", {}) or {}
|
||||
|
||||
|
||||
def _attach_search_stats(tree, opt):
|
||||
try:
|
||||
tree.qibotn_search_stats = _optimizer_search_stats(opt)
|
||||
@@ -48,6 +58,47 @@ def _attach_search_stats(tree, opt):
|
||||
return tree
|
||||
|
||||
|
||||
def _search_seed_kwargs(optlib, seed):
|
||||
if optlib == "random":
|
||||
return {"seed": seed}
|
||||
if optlib is None:
|
||||
return {"sampler_opts": {"seed": seed}}
|
||||
return {}
|
||||
|
||||
|
||||
def _fallback_greedy_tree(tn, output_inds, slicing_opts=None, error=None):
|
||||
import cotengra as ctg
|
||||
|
||||
tree = tn.contraction_tree(
|
||||
output_inds=output_inds,
|
||||
optimize=ctg.GreedyOptimizer(),
|
||||
)
|
||||
if slicing_opts:
|
||||
target_size = slicing_opts.get("target_size")
|
||||
target_slices = slicing_opts.get("target_slices")
|
||||
if target_size is not None:
|
||||
tree.slice_(target_size=target_size)
|
||||
elif target_slices is not None:
|
||||
tree.slice_(target_slices=target_slices)
|
||||
try:
|
||||
tree.qibotn_search_stats = {
|
||||
"completed_trials": 0,
|
||||
"finite_trials": 0,
|
||||
"failed_trials": 0,
|
||||
"requested_trials": 0,
|
||||
"trial_seconds_sum": 0.0,
|
||||
"best_score": float("nan"),
|
||||
"best_flops": float("nan"),
|
||||
"best_write": float("nan"),
|
||||
"best_size": float("nan"),
|
||||
"fallback": "greedy",
|
||||
"fallback_error": repr(error) if error is not None else None,
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
return tree
|
||||
|
||||
|
||||
def _dask_worker_slots(client):
|
||||
info = client.scheduler_info(n_workers=-1)
|
||||
workers = info.get("workers", {})
|
||||
@@ -218,13 +269,18 @@ def _search_chunk(
|
||||
slicing_opts,
|
||||
optlib=None,
|
||||
):
|
||||
import random, cotengra as ctg
|
||||
import random
|
||||
|
||||
import cotengra as ctg
|
||||
|
||||
seed = int(seed)
|
||||
random.seed(seed)
|
||||
np.random.seed(seed % (2**32))
|
||||
tn = pickle.loads(tn_bytes)
|
||||
kwargs = {}
|
||||
if optlib is not None:
|
||||
kwargs["optlib"] = optlib
|
||||
kwargs.update(_search_seed_kwargs(optlib, seed))
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=SEARCH_METHODS,
|
||||
max_repeats=repeats,
|
||||
@@ -266,7 +322,15 @@ def _kill_pool(pool):
|
||||
pool.shutdown(wait=False)
|
||||
|
||||
|
||||
def _serial_search(tn_bytes, output_inds, repeats, seed, max_time, slicing_opts=None, trial_timeout=None):
|
||||
def _serial_search(
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
repeats,
|
||||
seed,
|
||||
max_time,
|
||||
slicing_opts=None,
|
||||
trial_timeout=None,
|
||||
):
|
||||
import time
|
||||
|
||||
if trial_timeout is None:
|
||||
@@ -287,7 +351,13 @@ def _serial_search(tn_bytes, output_inds, repeats, seed, max_time, slicing_opts=
|
||||
break
|
||||
timeout = min(trial_timeout, deadline - time.time())
|
||||
pool = ProcessPoolExecutor(max_workers=1)
|
||||
fut = pool.submit(_run_single_trial, tn_bytes, output_inds, seed * 10000 + i, slicing_opts)
|
||||
fut = pool.submit(
|
||||
_run_single_trial,
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
seed * 10000 + i,
|
||||
slicing_opts,
|
||||
)
|
||||
try:
|
||||
cost, tree = fut.result(timeout=timeout)
|
||||
if cost < best_cost:
|
||||
@@ -304,15 +374,30 @@ def _split_repeats(total_repeats, n_workers):
|
||||
n_workers = max(1, int(n_workers))
|
||||
total_repeats = max(1, int(total_repeats))
|
||||
chunk, extra = divmod(total_repeats, n_workers)
|
||||
return [chunk + (1 if i < extra else 0) for i in range(n_workers) if chunk + (1 if i < extra else 0) > 0]
|
||||
return [
|
||||
chunk + (1 if i < extra else 0)
|
||||
for i in range(n_workers)
|
||||
if chunk + (1 if i < extra else 0) > 0
|
||||
]
|
||||
|
||||
|
||||
def _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts=None, trial_timeout=None):
|
||||
def _processpool_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
n_workers,
|
||||
max_time,
|
||||
slicing_opts=None,
|
||||
trial_timeout=None,
|
||||
search_seed=0,
|
||||
):
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
repeat_chunks = _split_repeats(total_repeats, n_workers)
|
||||
pool = ProcessPoolExecutor(max_workers=len(repeat_chunks))
|
||||
futures = []
|
||||
for seed, repeats in enumerate(repeat_chunks):
|
||||
errors = []
|
||||
for worker_id, repeats in enumerate(repeat_chunks):
|
||||
seed = int(search_seed) + worker_id
|
||||
futures.append(
|
||||
pool.submit(
|
||||
_serial_search,
|
||||
@@ -334,14 +419,34 @@ def _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, sli
|
||||
cost, tree = fut.result()
|
||||
if cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
errors.append(repr(exc))
|
||||
except TimeoutError:
|
||||
pass
|
||||
errors.append("TimeoutError()")
|
||||
finally:
|
||||
for fut in futures:
|
||||
fut.cancel()
|
||||
_kill_pool(pool)
|
||||
if best_tree is None:
|
||||
if errors:
|
||||
print(
|
||||
"qibotn_search_failed "
|
||||
f"backend=processpool errors={errors[:3]} "
|
||||
f"num_errors={len(errors)} fallback=greedy",
|
||||
flush=True,
|
||||
)
|
||||
else:
|
||||
print(
|
||||
"qibotn_search_failed "
|
||||
"backend=processpool errors=[] fallback=greedy",
|
||||
flush=True,
|
||||
)
|
||||
return _fallback_greedy_tree(
|
||||
tn,
|
||||
output_inds,
|
||||
slicing_opts=slicing_opts,
|
||||
error=errors[:3],
|
||||
)
|
||||
return best_tree
|
||||
|
||||
|
||||
@@ -356,6 +461,8 @@ def _dask_search(
|
||||
optlib=None,
|
||||
debug_trials=False,
|
||||
close_workers=False,
|
||||
expected_workers=None,
|
||||
search_seed=0,
|
||||
):
|
||||
"""Run one centralized cotengra hyper-optimizer over a dask pool.
|
||||
|
||||
@@ -370,8 +477,14 @@ def _dask_search(
|
||||
"`pip install distributed` or the package extra that provides it."
|
||||
) from exc
|
||||
|
||||
import random
|
||||
|
||||
import cotengra as ctg
|
||||
|
||||
search_seed = int(search_seed)
|
||||
random.seed(search_seed)
|
||||
np.random.seed(search_seed % (2**32))
|
||||
|
||||
_patch_cotengra_dask_as_completed()
|
||||
_patch_cotengra_dask_submit(debug_trials=debug_trials)
|
||||
|
||||
@@ -399,10 +512,13 @@ def _dask_search(
|
||||
kwargs = {}
|
||||
if optlib is not None:
|
||||
kwargs["optlib"] = optlib
|
||||
kwargs.update(_search_seed_kwargs(optlib, search_seed))
|
||||
|
||||
retire_workers = []
|
||||
try:
|
||||
workers, worker_slots = _dask_worker_slots(client)
|
||||
if expected_workers is not None:
|
||||
worker_slots = max(worker_slots, int(expected_workers))
|
||||
if close_workers:
|
||||
retire_workers = list(workers)
|
||||
if debug_trials:
|
||||
@@ -467,10 +583,12 @@ def _mpi_search(
|
||||
dask_address=None,
|
||||
debug_trials=False,
|
||||
dask_close_workers=False,
|
||||
search_seed=0,
|
||||
):
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
search_backend = search_backend or "processpool"
|
||||
search_seed = int(search_seed)
|
||||
|
||||
if search_backend == "dask":
|
||||
if not dask_address:
|
||||
@@ -493,6 +611,7 @@ def _mpi_search(
|
||||
n_workers=n_workers,
|
||||
debug_trials=debug_trials,
|
||||
close_workers=dask_close_workers,
|
||||
search_seed=search_seed,
|
||||
)
|
||||
payload = ("ok", tree)
|
||||
except Exception as exc:
|
||||
@@ -515,6 +634,7 @@ def _mpi_search(
|
||||
max_time,
|
||||
slicing_opts,
|
||||
trial_timeout,
|
||||
search_seed=search_seed + rank * max(1, n_workers or 1),
|
||||
)
|
||||
local_cost = local_tree.combo_cost(factor=256) if local_tree else float("inf")
|
||||
|
||||
@@ -528,11 +648,22 @@ def _mpi_search(
|
||||
return comm.bcast(best_tree, root=0)
|
||||
|
||||
|
||||
def parallel_path_search(tn, output_inds, method='processpool', total_repeats=1024,
|
||||
max_time=300, n_workers=48, slicing_opts=None,
|
||||
trial_timeout=None, search_backend=None,
|
||||
dask_address=None, debug_trials=False,
|
||||
dask_close_workers=False):
|
||||
def parallel_path_search(
|
||||
tn,
|
||||
output_inds,
|
||||
method="processpool",
|
||||
total_repeats=1024,
|
||||
max_time=300,
|
||||
n_workers=48,
|
||||
slicing_opts=None,
|
||||
trial_timeout=None,
|
||||
search_backend=None,
|
||||
dask_address=None,
|
||||
debug_trials=False,
|
||||
dask_close_workers=False,
|
||||
expected_workers=None,
|
||||
search_seed=0,
|
||||
):
|
||||
"""Parallel contraction path search.
|
||||
|
||||
Args:
|
||||
@@ -543,11 +674,32 @@ def parallel_path_search(tn, output_inds, method='processpool', total_repeats=10
|
||||
slicing_opts: cotengra slicing options for memory control
|
||||
trial_timeout: Per-trial timeout (seconds); kills and skips hung trials
|
||||
"""
|
||||
if method == 'serial':
|
||||
if method == "serial":
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
_, tree = _serial_search(tn_bytes, output_inds, total_repeats, 0, max_time, slicing_opts, trial_timeout)
|
||||
try:
|
||||
_, tree = _serial_search(
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
search_seed,
|
||||
max_time,
|
||||
slicing_opts,
|
||||
trial_timeout,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(
|
||||
"qibotn_search_failed "
|
||||
f"backend=serial error={exc!r} fallback=greedy",
|
||||
flush=True,
|
||||
)
|
||||
return _fallback_greedy_tree(
|
||||
tn,
|
||||
output_inds,
|
||||
slicing_opts=slicing_opts,
|
||||
error=exc,
|
||||
)
|
||||
return tree
|
||||
elif method == 'mpi':
|
||||
if method == "mpi":
|
||||
if not _HAVE_MPI:
|
||||
raise ImportError("mpi4py not available")
|
||||
return _mpi_search(
|
||||
@@ -562,10 +714,20 @@ def parallel_path_search(tn, output_inds, method='processpool', total_repeats=10
|
||||
dask_address=dask_address,
|
||||
debug_trials=debug_trials,
|
||||
dask_close_workers=dask_close_workers,
|
||||
search_seed=search_seed,
|
||||
)
|
||||
elif method == 'processpool':
|
||||
return _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts, trial_timeout)
|
||||
elif method == 'dask':
|
||||
if method == "processpool":
|
||||
return _processpool_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
n_workers,
|
||||
max_time,
|
||||
slicing_opts,
|
||||
trial_timeout,
|
||||
search_seed=search_seed,
|
||||
)
|
||||
if method == "dask":
|
||||
return _dask_search(
|
||||
tn,
|
||||
output_inds,
|
||||
@@ -576,9 +738,10 @@ def parallel_path_search(tn, output_inds, method='processpool', total_repeats=10
|
||||
n_workers=n_workers,
|
||||
debug_trials=debug_trials,
|
||||
close_workers=dask_close_workers,
|
||||
expected_workers=expected_workers,
|
||||
search_seed=search_seed,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown method: {method}")
|
||||
raise ValueError(f"Unknown method: {method}")
|
||||
|
||||
|
||||
def contraction_tree_costs(tree, dtype_bytes=16, combo_factor=256):
|
||||
@@ -611,6 +774,171 @@ def contraction_tree_costs(tree, dtype_bytes=16, combo_factor=256):
|
||||
}
|
||||
|
||||
|
||||
def load_tree_payload(path, index=0):
|
||||
with Path(path).open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
trees = payload["trees"] if isinstance(payload, dict) else payload
|
||||
if not isinstance(trees, (list, tuple)):
|
||||
trees = [trees]
|
||||
return payload, trees[index]
|
||||
|
||||
|
||||
def save_tree_payload(path, payload):
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("wb") as f:
|
||||
pickle.dump(payload, f)
|
||||
|
||||
|
||||
def slice_tree_payload(path, output_path, *, term=0, target_slices=2, max_repeats=64, seed=42):
|
||||
payload, tree = load_tree_payload(path, index=term)
|
||||
original_costs = contraction_tree_costs(tree)
|
||||
sliced_tree = tree.slice(
|
||||
target_slices=target_slices,
|
||||
max_repeats=max_repeats,
|
||||
seed=seed,
|
||||
)
|
||||
sliced_costs = contraction_tree_costs(sliced_tree)
|
||||
|
||||
if isinstance(payload, dict):
|
||||
out_payload = dict(payload)
|
||||
trees = payload["trees"] if isinstance(payload["trees"], (list, tuple)) else [payload["trees"]]
|
||||
new_trees = list(trees)
|
||||
new_trees[term] = sliced_tree
|
||||
out_payload["trees"] = new_trees
|
||||
out_payload["costs"] = [contraction_tree_costs(t) for t in new_trees]
|
||||
out_payload["nterms"] = len(new_trees)
|
||||
else:
|
||||
trees = payload if isinstance(payload, (list, tuple)) else [payload]
|
||||
new_trees = list(trees)
|
||||
new_trees[term] = sliced_tree
|
||||
out_payload = new_trees
|
||||
|
||||
save_tree_payload(output_path, out_payload)
|
||||
return TreePayloadSliceResult(
|
||||
payload=payload,
|
||||
tree=tree,
|
||||
sliced_tree=sliced_tree,
|
||||
original_costs=original_costs,
|
||||
sliced_costs=sliced_costs,
|
||||
)
|
||||
|
||||
|
||||
def _prod(values):
|
||||
out = 1
|
||||
for value in values:
|
||||
out *= int(value)
|
||||
return out
|
||||
|
||||
|
||||
def _broadcast_batch(a_batch, b_batch):
|
||||
if a_batch == b_batch:
|
||||
return _prod(a_batch)
|
||||
if not a_batch:
|
||||
return _prod(b_batch)
|
||||
if not b_batch:
|
||||
return _prod(a_batch)
|
||||
ndim = max(len(a_batch), len(b_batch))
|
||||
a_batch = (1,) * (ndim - len(a_batch)) + tuple(a_batch)
|
||||
b_batch = (1,) * (ndim - len(b_batch)) + tuple(b_batch)
|
||||
return _prod(max(a, b) for a, b in zip(a_batch, b_batch))
|
||||
|
||||
|
||||
def analyze_contraction_tree(tree):
|
||||
contract_mod = importlib.import_module("cotengra.contract")
|
||||
contractions = contract_mod.extract_contractions(tree)
|
||||
size_dict = tree.size_dict
|
||||
ops = []
|
||||
counts = Counter()
|
||||
|
||||
for op_index, (parent, left, right, tdot, arg, perm) in enumerate(contractions):
|
||||
if left is None and right is None:
|
||||
counts["preprocess"] += 1
|
||||
continue
|
||||
|
||||
left_inds = tree.get_inds(left)
|
||||
right_inds = tree.get_inds(right)
|
||||
parent_inds = tree.get_inds(parent)
|
||||
left_shape = tuple(size_dict[ix] for ix in left_inds)
|
||||
right_shape = tuple(size_dict[ix] for ix in right_inds)
|
||||
|
||||
if tdot:
|
||||
parsed = contract_mod._parse_tensordot_axes_to_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
else:
|
||||
parsed = contract_mod._parse_eq_to_batch_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
|
||||
(
|
||||
_eq_a,
|
||||
_eq_b,
|
||||
new_shape_a,
|
||||
new_shape_b,
|
||||
_new_shape_ab,
|
||||
_perm_ab,
|
||||
pure_multiplication,
|
||||
) = parsed
|
||||
|
||||
matmul_shape = None
|
||||
matmul_flops = 0
|
||||
if pure_multiplication:
|
||||
kind = "mul"
|
||||
else:
|
||||
a_shape = tuple(new_shape_a or left_shape)
|
||||
b_shape = tuple(new_shape_b or right_shape)
|
||||
batch = _broadcast_batch(a_shape[:-2], b_shape[:-2])
|
||||
m, k, n = int(a_shape[-2]), int(a_shape[-1]), int(b_shape[-1])
|
||||
kind = "mm" if batch == 1 else "bmm"
|
||||
matmul_shape = (batch, m, k, n)
|
||||
matmul_flops = batch * m * k * n
|
||||
|
||||
tree_flops = int(tree.get_flops(parent))
|
||||
out_size = int(tree.get_size(parent))
|
||||
ops.append(
|
||||
ContractionOpInfo(
|
||||
index=op_index,
|
||||
kind=kind,
|
||||
matmul_shape=matmul_shape,
|
||||
matmul_flops=matmul_flops,
|
||||
tree_flops=tree_flops,
|
||||
out_size=out_size,
|
||||
left_shape=left_shape,
|
||||
right_shape=right_shape,
|
||||
left_rank=len(left_inds),
|
||||
right_rank=len(right_inds),
|
||||
out_rank=len(parent_inds),
|
||||
perm=perm,
|
||||
)
|
||||
)
|
||||
counts[kind] += 1
|
||||
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
per_slice_flops = sum(op.tree_flops for op in ops)
|
||||
per_slice_write = sum(op.out_size for op in ops)
|
||||
max_out = max((op.out_size for op in ops), default=0)
|
||||
dtype_bytes = 16
|
||||
return TreeInspectionResult(
|
||||
tree=tree,
|
||||
contractions=tuple(contractions),
|
||||
operations=tuple(ops),
|
||||
counts=dict(counts),
|
||||
nslices=nslices,
|
||||
per_slice_flops=per_slice_flops,
|
||||
per_slice_write=per_slice_write,
|
||||
max_output_size=max_out,
|
||||
all_slice_flops=per_slice_flops * nslices,
|
||||
all_slice_write=per_slice_write * nslices,
|
||||
dtype_bytes=dtype_bytes,
|
||||
max_output_gib=max_out * dtype_bytes / 1024**3,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SlicePlan:
|
||||
"""Slice ownership for one MPI rank."""
|
||||
@@ -633,6 +961,49 @@ class SlicedContractStats:
|
||||
assignment: str
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TreePayloadSliceResult:
|
||||
"""Result of slicing one tree stored in a tree payload."""
|
||||
|
||||
payload: object
|
||||
tree: object
|
||||
sliced_tree: object
|
||||
original_costs: dict
|
||||
sliced_costs: dict
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ContractionOpInfo:
|
||||
index: int
|
||||
kind: str
|
||||
matmul_shape: tuple | None
|
||||
matmul_flops: int
|
||||
tree_flops: int
|
||||
out_size: int
|
||||
left_shape: tuple
|
||||
right_shape: tuple
|
||||
left_rank: int
|
||||
right_rank: int
|
||||
out_rank: int
|
||||
perm: object
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class TreeInspectionResult:
|
||||
tree: object
|
||||
contractions: tuple
|
||||
operations: tuple
|
||||
counts: dict
|
||||
nslices: int
|
||||
per_slice_flops: int
|
||||
per_slice_write: int
|
||||
max_output_size: int
|
||||
all_slice_flops: int
|
||||
all_slice_write: int
|
||||
dtype_bytes: int
|
||||
max_output_gib: float
|
||||
|
||||
|
||||
def mpi_slice_plan(nslices, rank, size, assignment="block"):
|
||||
"""Return the contraction slice ids assigned to one MPI rank.
|
||||
|
||||
|
||||
@@ -32,20 +32,19 @@ class TensorNetworkResult:
|
||||
statevector: ndarray
|
||||
|
||||
def __post_init__(self):
|
||||
# TODO: define the general convention when using backends different from qmatchatea
|
||||
if self.measured_probabilities is None:
|
||||
self.measured_probabilities = {"default": self.measured_probabilities}
|
||||
self.measured_probabilities = {}
|
||||
|
||||
def probabilities(self):
|
||||
"""Return calculated probabilities according to the given method."""
|
||||
if self.prob_type == "U":
|
||||
measured_probabilities = deepcopy(self.measured_probabilities)
|
||||
for bitstring, prob in self.measured_probabilities[self.prob_type].items():
|
||||
measured_probabilities[self.prob_type][bitstring] = prob[1] - prob[0]
|
||||
probabilities = measured_probabilities[self.prob_type]
|
||||
else:
|
||||
probabilities = self.measured_probabilities
|
||||
return probabilities
|
||||
if self.prob_type != "U":
|
||||
return self.measured_probabilities
|
||||
|
||||
measured_probabilities = deepcopy(self.measured_probabilities)
|
||||
values = measured_probabilities.get(self.prob_type, {})
|
||||
for bitstring, prob in values.items():
|
||||
values[bitstring] = prob[1] - prob[0]
|
||||
return values
|
||||
|
||||
def frequencies(self):
|
||||
"""Return frequencies if a certain number of shots has been set."""
|
||||
|
||||
90
src/qibotn/torch_utils.py
Normal file
90
src/qibotn/torch_utils.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""Shared torch helpers for qibotn CPU tensor-network code."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def torch_dtype(dtype):
|
||||
"""Return the torch dtype used by qibotn complex CPU contractions."""
|
||||
import torch
|
||||
|
||||
if dtype in ("complex64", "single", np.complex64):
|
||||
return torch.complex64
|
||||
return torch.complex128
|
||||
|
||||
|
||||
def numpy_dtype(dtype):
|
||||
"""Return the numpy dtype matching qibotn's complex dtype names."""
|
||||
if dtype in ("complex64", "single", np.complex64):
|
||||
return np.complex64
|
||||
return np.complex128
|
||||
|
||||
|
||||
def torch_cpu_array(data, dtype=None):
|
||||
"""Convert array-like data to a contiguous CPU torch tensor.
|
||||
|
||||
``torch.from_numpy`` rejects negative strides and read-only arrays in common
|
||||
quimb paths, so this helper normalizes both cases before handing data to
|
||||
torch.
|
||||
"""
|
||||
import torch
|
||||
|
||||
if isinstance(data, torch.Tensor):
|
||||
tensor = data
|
||||
else:
|
||||
array = np.asarray(data)
|
||||
if any(stride < 0 for stride in array.strides):
|
||||
array = np.ascontiguousarray(array)
|
||||
elif not array.flags.writeable:
|
||||
array = array.copy()
|
||||
tensor = torch.from_numpy(array)
|
||||
|
||||
if tensor.device.type != "cpu":
|
||||
tensor = tensor.cpu()
|
||||
target_dtype = torch_dtype(dtype) if isinstance(dtype, str) else dtype
|
||||
if target_dtype is not None and tensor.dtype != target_dtype:
|
||||
tensor = tensor.to(target_dtype)
|
||||
if not tensor.is_contiguous():
|
||||
tensor = tensor.contiguous()
|
||||
return tensor
|
||||
|
||||
|
||||
def arrays_to_torch(arrays, dtype="complex128"):
|
||||
"""Convert an iterable of arrays to CPU torch tensors."""
|
||||
target_dtype = torch_dtype(dtype)
|
||||
return [torch_cpu_array(array, dtype=target_dtype) for array in arrays]
|
||||
|
||||
|
||||
def arrays_to_numpy(arrays, dtype="complex128"):
|
||||
"""Convert an iterable of arrays to numpy arrays with qibotn dtype names."""
|
||||
target_dtype = numpy_dtype(dtype)
|
||||
return [np.asarray(array, dtype=target_dtype) for array in arrays]
|
||||
|
||||
|
||||
def arrays_to_backend(arrays, backend, engine=None, dtype="complex128"):
|
||||
"""Convert arrays to the backend representation used by quimb/cotengra."""
|
||||
if backend == "torch":
|
||||
return arrays_to_torch(arrays, dtype=dtype)
|
||||
if engine is not None:
|
||||
return [engine.asarray(array, dtype=numpy_dtype(dtype)) for array in arrays]
|
||||
return arrays_to_numpy(arrays, dtype=dtype)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads=None, interop_threads=None):
|
||||
"""Set torch CPU thread counts and return the active intra-op thread count."""
|
||||
import torch
|
||||
|
||||
if nthreads is not None:
|
||||
torch.set_num_threads(max(1, int(nthreads)))
|
||||
if interop_threads is not None:
|
||||
try:
|
||||
torch.set_num_interop_threads(max(1, int(interop_threads)))
|
||||
except RuntimeError:
|
||||
pass
|
||||
return torch.get_num_threads()
|
||||
|
||||
|
||||
def is_torch_array(value):
|
||||
"""Return whether *value* looks like a torch tensor without importing torch."""
|
||||
return type(value).__module__.startswith("torch")
|
||||
@@ -9,6 +9,12 @@ from qibotn.benchmark_cases import (
|
||||
build_circuit as build_benchmark_circuit,
|
||||
exact_pauli_sum,
|
||||
)
|
||||
from qibotn import cpu_expectation, mps_expectation, pauli_pattern, pauli_sum
|
||||
from qibotn.backends.quimb import (
|
||||
build_expectation_tn,
|
||||
contract_tn,
|
||||
search_contraction_tree,
|
||||
)
|
||||
|
||||
|
||||
def build_circuit(nqubits=6):
|
||||
@@ -46,6 +52,62 @@ def test_cpu_generic_tn_expectation_matches_statevector():
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_public_cpu_expectation_api_matches_statevector():
|
||||
circuit = build_circuit()
|
||||
observable = pauli_sum((0.5, [("X", 0), ("Z", 1)]), (-0.25, [("Z", 5)]))
|
||||
exact = exact_pauli_sum(
|
||||
circuit,
|
||||
[(0.5, (("X", 0), ("Z", 1))), (-0.25, (("Z", 5),))],
|
||||
circuit.nqubits,
|
||||
)
|
||||
|
||||
value = cpu_expectation(circuit, observable, torch_threads=1)
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_public_quimb_torch_pipeline_matches_statevector():
|
||||
circuit = build_circuit(nqubits=4)
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=X(0) * Z(1))
|
||||
exact = exact_pauli_sum(circuit, [(1.0, (("X", 0), ("Z", 1)))], 4)
|
||||
|
||||
built = build_expectation_tn(
|
||||
circuit,
|
||||
observable,
|
||||
dtype="complex128",
|
||||
merge_1q=True,
|
||||
merge_2q=True,
|
||||
)
|
||||
search = search_contraction_tree(
|
||||
built.tn,
|
||||
method="serial",
|
||||
total_repeats=1,
|
||||
max_time=30,
|
||||
n_workers=1,
|
||||
search_seed=0,
|
||||
)
|
||||
value = built.coeff * complex(contract_tn(built.tn, search.tree))
|
||||
|
||||
assert math.isclose(value.real, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_public_mps_expectation_api_accepts_pauli_pattern():
|
||||
circuit = build_circuit()
|
||||
exact_hamiltonian = hamiltonians.SymbolicHamiltonian(
|
||||
form=X(1) * Z(2) * X(4) * Z(5)
|
||||
)
|
||||
exact = exact_hamiltonian.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
value = mps_expectation(
|
||||
circuit,
|
||||
pauli_pattern("IXZ"),
|
||||
bond=64,
|
||||
torch_threads=1,
|
||||
)
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_mps_expectation_matches_statevector():
|
||||
circuit = build_circuit()
|
||||
observable = build_observable(circuit.nqubits)
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
# Tools
|
||||
|
||||
Auxiliary scripts for profiling, legacy comparisons, and scale probes.
|
||||
|
||||
The main CPU expectation entrypoint is `../benchmark_cpu_expectation.py`.
|
||||
For the current Vidal/MPS 1D-chain tests, prefer `../run_vidal_mps_cases.sh`.
|
||||
|
||||
Files here are intentionally secondary:
|
||||
|
||||
- `compare_vidal_backend_qmatchatea.py`: diagnostic comparison against QMatchaTea.
|
||||
- `profile_vidal_chrome.py`: PyTorch CPU profiler for the Vidal path.
|
||||
- `run_cpu_single_cases.sh`: single-node scale probes.
|
||||
- `run_cpu_large_cases.sh`: two-node MPI scale probes.
|
||||
- `run_vidal_segment_mpi_scan.sh`: rank/thread scaling scan for Vidal segmented MPI.
|
||||
- `baseline_mps_expectation.py`: legacy MPS comparison CLI kept for old commands.
|
||||
- `benchmark_tn_mpi.py`, `benchmark_search.py`, `benchmark_slice.py`, `benchmark_contract_sliced.py`, `check_tree.py`: old TN path-search/slicing experiments.
|
||||
- `qibojit_reference_expectation.py`: state-vector reference helper.
|
||||
- `validate_vidal_mpi_correctness.py`: focused Vidal MPI correctness helper.
|
||||
@@ -1,201 +0,0 @@
|
||||
"""MPS expectation benchmark for qmatchatea and Vidal backends."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
from qibotn.benchmark_cases import (
|
||||
build_circuit as build_benchmark_circuit,
|
||||
exact_pauli_sum,
|
||||
observable_terms,
|
||||
terms_to_dict,
|
||||
)
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal_tebd import run_vidal_ring_xz
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
return build_benchmark_circuit("brickwall_cnot", nqubits, nlayers, seed)
|
||||
|
||||
|
||||
def build_observable(nqubits):
|
||||
return terms_to_dict(observable_terms("ring_xz", nqubits))
|
||||
|
||||
|
||||
def exact_expectation(circuit, nqubits):
|
||||
return exact_pauli_sum(circuit, observable_terms("ring_xz", nqubits), nqubits)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=40)
|
||||
parser.add_argument("--nlayers", type=int, default=30)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=512)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument(
|
||||
"--executor",
|
||||
choices=("qmatchatea", "vidal", "vidal-mpi"),
|
||||
default="qmatchatea",
|
||||
)
|
||||
parser.add_argument("--mpi-ct", action="store_true")
|
||||
parser.add_argument("--mpi-barriers", type=int, default=-1)
|
||||
parser.add_argument("--mpi-isometrization", type=int, default=-1)
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--reference-file")
|
||||
parser.add_argument(
|
||||
"--mpi-rank-map",
|
||||
action="store_true",
|
||||
help="Print MPI rank, host, pid, and torch thread placement metadata.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
logging.getLogger("qibo.config").setLevel(logging.ERROR)
|
||||
logging.getLogger("qtealeaves").setLevel(logging.ERROR)
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
rank = 0
|
||||
size = 1
|
||||
if args.mpi_ct:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
size = MPI.COMM_WORLD.Get_size()
|
||||
if args.mpi_rank_map:
|
||||
rank_info = {
|
||||
"rank": rank,
|
||||
"size": size,
|
||||
"host": socket.gethostname(),
|
||||
"pid": os.getpid(),
|
||||
"torch_threads": args.torch_threads,
|
||||
"omp_num_threads": os.environ.get("OMP_NUM_THREADS", ""),
|
||||
"mkl_num_threads": os.environ.get("MKL_NUM_THREADS", ""),
|
||||
}
|
||||
rank_infos = MPI.COMM_WORLD.gather(rank_info, root=0)
|
||||
if rank == 0:
|
||||
print("mpi_rank_map")
|
||||
for item in sorted(rank_infos, key=lambda row: row["rank"]):
|
||||
print(
|
||||
"rank={rank} size={size} host={host} pid={pid} "
|
||||
"torch_threads={torch_threads} "
|
||||
"OMP_NUM_THREADS={omp_num_threads} "
|
||||
"MKL_NUM_THREADS={mkl_num_threads}".format(**item)
|
||||
)
|
||||
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed)
|
||||
observable = build_observable(args.nqubits)
|
||||
exact = None
|
||||
if args.reference_file:
|
||||
with open(args.reference_file, "r", encoding="utf-8") as f:
|
||||
exact = float(json.load(f)["expectation"])
|
||||
elif args.exact:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_expectation(circuit, args.nqubits)
|
||||
|
||||
if rank == 0:
|
||||
if args.mpi_ct and args.executor in ("vidal", "vidal-mpi"):
|
||||
mpi_label = f"VidalSegment/{size}"
|
||||
else:
|
||||
mpi_label = f"MPIMPS/{size}" if args.mpi_ct else "SR"
|
||||
print(
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={format_optional(args.bond)} "
|
||||
f"cut_ratio={format_optional(args.cut_ratio)} seed={args.seed} "
|
||||
f"tensor_module={args.tensor_module} svd_control=E! "
|
||||
f"compile_circuit=True mpi={mpi_label} executor={args.executor}"
|
||||
)
|
||||
if exact is not None:
|
||||
print(f"exact={exact:.16e}")
|
||||
print("expval abs_error rel_error seconds")
|
||||
|
||||
start = time.perf_counter()
|
||||
timings = None
|
||||
if args.executor in ("vidal", "vidal-mpi"):
|
||||
if args.executor == "vidal-mpi" and not args.mpi_ct:
|
||||
raise ValueError("--executor vidal-mpi requires --mpi-ct.")
|
||||
if args.mpi_ct:
|
||||
from qibotn.backends.vidal_mpi_segment import run_segment_vidal_mpi_ring_xz
|
||||
|
||||
value, timings = run_segment_vidal_mpi_ring_xz(
|
||||
circuit,
|
||||
max_bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module=args.tensor_module,
|
||||
comm=MPI.COMM_WORLD,
|
||||
)
|
||||
else:
|
||||
value = run_vidal_ring_xz(
|
||||
circuit,
|
||||
max_bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module=args.tensor_module,
|
||||
)
|
||||
else:
|
||||
backend = QMatchaTeaBackend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
svd_control="E!",
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
mpi_approach="CT" if args.mpi_ct else "SR",
|
||||
mpi_num_procs=size,
|
||||
mpi_where_barriers=args.mpi_barriers if args.mpi_ct else -1,
|
||||
mpi_isometrization=args.mpi_isometrization,
|
||||
)
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=False,
|
||||
compile_circuit=True,
|
||||
)
|
||||
max_timings = None
|
||||
if timings:
|
||||
max_timings = {
|
||||
key: MPI.COMM_WORLD.reduce(local_value, op=MPI.MAX, root=0)
|
||||
for key, local_value in timings.items()
|
||||
}
|
||||
if rank != 0:
|
||||
return
|
||||
value = float(np.real(value))
|
||||
elapsed = time.perf_counter() - start
|
||||
abs_error = float("nan") if exact is None else abs(value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
print(f"{value:.16e} {abs_error:.6e} {rel_error:.6e} {elapsed:.3f}")
|
||||
if max_timings:
|
||||
print("timing_section max_seconds")
|
||||
for key, max_value in max_timings.items():
|
||||
print(f"{key} {max_value:.6f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,56 +0,0 @@
|
||||
"""MPI parallel sliced contraction using pre-sliced tree."""
|
||||
import time, pickle, os
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
|
||||
NQUBITS, NLAYERS, NCORES = 25, 10, 48
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
|
||||
os.environ['OMP_NUM_THREADS'] = str(NCORES)
|
||||
os.environ['MKL_NUM_THREADS'] = str(NCORES)
|
||||
|
||||
import torch
|
||||
import qibo, quimb as qu
|
||||
from qibotn.observables import build_random_circuit
|
||||
|
||||
torch.set_num_threads(NCORES)
|
||||
|
||||
circuit = build_random_circuit(NQUBITS, NLAYERS)
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
backend = qibo.get_backend()
|
||||
backend.configure_tn_simulation(ansatz="tn")
|
||||
qc = backend._qibo_circuit_to_quimb(circuit, backend.circuit_ansatz)
|
||||
tn = qc.local_expectation(qu.pauli('x') & qu.pauli('z'), (0, 1), rehearse='tn')
|
||||
|
||||
if rank == 0:
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
else:
|
||||
tree = None
|
||||
tree = comm.bcast(tree, root=0)
|
||||
|
||||
arrays = [torch.from_numpy(np.asarray(t._data)) for t in tn.tensors]
|
||||
n_slices = tree.multiplicity
|
||||
|
||||
if rank == 0:
|
||||
print(f"Slices: {n_slices}, Ranks: {size}, "
|
||||
f"Peak: {tree.max_size() * 16 / 1e9:.2f} GB, "
|
||||
f"Threads/rank: {NCORES}, Backend: torch")
|
||||
|
||||
t0 = time.time()
|
||||
result = None
|
||||
for i in range(rank, n_slices, size):
|
||||
val = tree.contract_slice(arrays, i, backend='torch')
|
||||
val_np = val.cpu().numpy().reshape(-1)
|
||||
result = val_np if result is None else result + val_np
|
||||
|
||||
if result is None:
|
||||
result = np.zeros(1, dtype=np.complex128)
|
||||
|
||||
total = np.zeros_like(result) if rank == 0 else None
|
||||
comm.Reduce(result, total, root=0)
|
||||
|
||||
if rank == 0:
|
||||
print(f"Contract: {time.time() - t0:.4f}s Expectation: {0.5 * total[0].real:.10f}")
|
||||
@@ -1,34 +0,0 @@
|
||||
"""Search contraction path and save."""
|
||||
import time, os, pickle
|
||||
from qibotn.parallel import parallel_path_search
|
||||
from qibotn.observables import build_random_circuit
|
||||
import qibo, quimb as qu
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
NQUBITS, NLAYERS, WORKERS = 20, 10, 96
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
method = 'mpi' if size > 1 else 'processpool'
|
||||
|
||||
circuit = build_random_circuit(NQUBITS, NLAYERS)
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
backend = qibo.get_backend()
|
||||
backend.configure_tn_simulation(ansatz="tn")
|
||||
qc = backend._qibo_circuit_to_quimb(circuit, backend.circuit_ansatz)
|
||||
tn = qc.local_expectation(qu.pauli('x') & qu.pauli('z'), (0, 1), rehearse='tn')
|
||||
|
||||
if rank == 0:
|
||||
print(f"Searching {NQUBITS}q {NLAYERS}l, method={method}, ranks={size}, workers/rank={WORKERS}...")
|
||||
t0 = time.time()
|
||||
tree = parallel_path_search(tn, tn.outer_inds(), method=method,
|
||||
total_repeats=1024, max_time=300, n_workers=WORKERS,trial_timeout=60)
|
||||
t_search = time.time() - t0
|
||||
|
||||
if rank == 0:
|
||||
os.makedirs('data', exist_ok=True)
|
||||
path = f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl"
|
||||
with open(path, 'wb') as f:
|
||||
pickle.dump(tree, f)
|
||||
print(f"Search: {t_search:.2f}s Peak: {tree.max_size() * 16 / 1e9:.2f} GB Saved: {path}")
|
||||
@@ -1,16 +0,0 @@
|
||||
"""Slice saved tree and save."""
|
||||
import pickle
|
||||
|
||||
NQUBITS, NLAYERS = 25, 10
|
||||
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl", 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
|
||||
print(f"Original peak: {tree.max_size() * 16 / 1e9:.2f} GB")
|
||||
|
||||
tree_sliced = tree.slice_and_reconfigure(target_size=2**28)
|
||||
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'wb') as f:
|
||||
pickle.dump(tree_sliced, f)
|
||||
|
||||
print(f"Sliced peak: {tree_sliced.max_size() * 16 / 1e9:.2f} GB Slices: {tree_sliced.multiplicity}")
|
||||
@@ -1,378 +0,0 @@
|
||||
"""MPI-parallel TN benchmark: path search + contraction via MPI."""
|
||||
import json
|
||||
import pickle
|
||||
import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
import cotengra as ctg
|
||||
import qibo
|
||||
from qibo import Circuit, gates
|
||||
from mpi4py import MPI
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from qibotn.observables import check_observable, extract_gates_and_qubits
|
||||
|
||||
|
||||
def _load_observable(observable_file=None, observable_json=None):
|
||||
if observable_file:
|
||||
with open(observable_file, "r", encoding="utf8") as f:
|
||||
return json.load(f)
|
||||
if observable_json:
|
||||
return json.loads(observable_json)
|
||||
return None
|
||||
|
||||
|
||||
def _term_to_quimb_operator(term):
|
||||
"""Convert one extracted Hamiltonian term to a quimb operator."""
|
||||
import quimb as qu
|
||||
|
||||
coeff = complex(term[0][2]) if term else 1.0
|
||||
op = None
|
||||
where = []
|
||||
|
||||
for qubit, gate_name, _ in term:
|
||||
qubit = int(qubit)
|
||||
gate_name = str(gate_name).upper()
|
||||
if gate_name == "I":
|
||||
continue
|
||||
where.append(qubit)
|
||||
op = qu.pauli(gate_name.lower()) if op is None else op & qu.pauli(gate_name.lower())
|
||||
|
||||
return complex(coeff), op, tuple(where)
|
||||
|
||||
|
||||
def _run_serial_search(tn_bytes, output_inds, repeats, seed, num_slices, n_ranks, max_time):
|
||||
import pickle, cotengra as ctg, random
|
||||
random.seed(seed)
|
||||
tn = pickle.loads(tn_bytes)
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=['kahypar', 'kahypar-agglom', 'spinglass'],
|
||||
max_repeats=repeats,
|
||||
parallel=False,
|
||||
minimize='combo-256',
|
||||
max_time=max_time,
|
||||
optlib="random",
|
||||
slicing_opts={'target_size': 2**29, 'allow_outer': True},
|
||||
progbar=False,
|
||||
)
|
||||
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
|
||||
return tree.combo_cost(factor=256), tree
|
||||
|
||||
|
||||
def parallel_search(tn, output_inds, total_repeats, n_workers, num_slices, n_ranks,
|
||||
timeout):
|
||||
import pickle, os, signal
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
if n_workers <= 1:
|
||||
return _run_serial_search(
|
||||
tn_bytes, output_inds, total_repeats, 0, num_slices, n_ranks, timeout
|
||||
)[1]
|
||||
repeats_per = max(1, total_repeats // n_workers)
|
||||
best_cost, best_tree = float('inf'), None
|
||||
|
||||
pool = ProcessPoolExecutor(max_workers=n_workers)
|
||||
futures = [
|
||||
pool.submit(_run_serial_search, tn_bytes, output_inds,
|
||||
repeats_per, seed, num_slices, n_ranks, timeout)
|
||||
for seed in range(n_workers)
|
||||
]
|
||||
try:
|
||||
for fut in as_completed(futures, timeout=timeout + 5):
|
||||
try:
|
||||
cost, tree = fut.result()
|
||||
if cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
except Exception as e:
|
||||
print(f" [worker failed] {e}")
|
||||
except TimeoutError:
|
||||
pass
|
||||
finally:
|
||||
for fut in futures:
|
||||
fut.cancel()
|
||||
for pid in list(pool._processes.keys()):
|
||||
try:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
pool.shutdown(wait=False)
|
||||
|
||||
return best_tree
|
||||
|
||||
|
||||
def make_circuit(circuit_type, nqubits, nlayers=1):
|
||||
c = Circuit(nqubits)
|
||||
if circuit_type == "qft":
|
||||
from qibo.models import QFT
|
||||
return QFT(nqubits)
|
||||
elif circuit_type == "variational":
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
offset = layer % 2
|
||||
for q in range(offset, nqubits - 1, 2):
|
||||
c.add(gates.CZ(q, q + 1))
|
||||
elif circuit_type == "ghz":
|
||||
c.add(gates.H(0))
|
||||
for q in range(nqubits - 1):
|
||||
c.add(gates.CNOT(q, q + 1))
|
||||
elif circuit_type == "brickwork":
|
||||
for q in range(nqubits):
|
||||
c.add(gates.H(q))
|
||||
for layer in range(nlayers):
|
||||
offset = layer % 2
|
||||
for q in range(offset, nqubits - 1, 2):
|
||||
c.add(gates.CNOT(q, q + 1))
|
||||
c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit: {circuit_type}")
|
||||
return c
|
||||
|
||||
|
||||
def _contract_mpi(tree, arrays, comm, root=0):
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
is_torch = type(arrays[0]).__module__.startswith("torch")
|
||||
|
||||
result_np = None
|
||||
for i in range(rank, tree.multiplicity, size):
|
||||
x = tree.contract_slice(arrays, i)
|
||||
x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x))
|
||||
result_np = x_np if result_np is None else result_np + x_np
|
||||
|
||||
if result_np is None:
|
||||
result_np = np.zeros(1, dtype=np.complex128)
|
||||
|
||||
result = np.zeros_like(result_np) if rank == root else None
|
||||
comm.Reduce(result_np, result, root=root)
|
||||
|
||||
if rank == root:
|
||||
import torch
|
||||
return torch.from_numpy(np.asarray(result)) if is_torch else result
|
||||
return None
|
||||
|
||||
|
||||
def run_mpi(circuit, nqubits, num_slices, total_repeats=1024,
|
||||
load_path=None, save_path=None):
|
||||
"""Each MPI rank runs serial path search over total_repeats/size trials,
|
||||
rank 0 picks the global best, then all ranks contract in parallel."""
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
b = qibo.get_backend()
|
||||
b.configure_tn_simulation(ansatz="tn")
|
||||
|
||||
import torch
|
||||
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
|
||||
gate_opts={"max_bond": None, "cutoff": 1e-10})
|
||||
qc.to_backend = lambda x: torch.from_numpy(x).to(torch.complex128)
|
||||
|
||||
# --- path search: each rank serial, gather best to rank 0 ---
|
||||
if load_path:
|
||||
if rank == 0:
|
||||
with open(load_path, "rb") as f:
|
||||
saved = pickle.load(f)
|
||||
tree, psi, t_search = saved["tree"], saved["psi"], 0.0
|
||||
print(f" [path loaded] {load_path}")
|
||||
else:
|
||||
tree = psi = None
|
||||
t_search = 0.0
|
||||
else:
|
||||
rank_repeats = max(1, total_repeats // size)
|
||||
t0 = time.time()
|
||||
# get TN object first (no contraction), then run parallel search
|
||||
psi_tn = qc.to_dense(rehearse="tn")
|
||||
local_tree = parallel_search(
|
||||
psi_tn, psi_tn.outer_inds(), rank_repeats, n_workers=48,
|
||||
num_slices=num_slices, n_ranks=size, timeout=600,
|
||||
)
|
||||
t_search = time.time() - t0
|
||||
local_psi = psi_tn
|
||||
|
||||
all_results = comm.gather((local_tree.combo_cost(factor=256), local_tree, local_psi), root=0)
|
||||
if rank == 0:
|
||||
_, tree, psi = min(all_results, key=lambda x: x[0])
|
||||
print(f" [path search] {t_search:.3f}s "
|
||||
f"flops~2^{tree.contraction_cost(log=2):.2f} "
|
||||
f"size~2^{tree.contraction_width():.2f} "
|
||||
f"slices={tree.multiplicity}")
|
||||
if save_path:
|
||||
with open(save_path, "wb") as f:
|
||||
pickle.dump({"tree": tree, "psi": psi}, f)
|
||||
print(f" [path saved] {save_path}")
|
||||
else:
|
||||
tree = psi = None
|
||||
|
||||
if save_path:
|
||||
t_search = comm.bcast(t_search, root=0)
|
||||
return None, t_search
|
||||
|
||||
tree = comm.bcast(tree, root=0)
|
||||
psi = comm.bcast(psi, root=0)
|
||||
t_search = comm.bcast(t_search, root=0)
|
||||
|
||||
# --- contraction: all ranks work in parallel ---
|
||||
import torch
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in psi.arrays]
|
||||
t0 = time.time()
|
||||
sv = _contract_mpi(tree, arrays, comm, root=0)
|
||||
t_contract = time.time() - t0
|
||||
|
||||
if rank == 0:
|
||||
print(f" [contraction] {t_contract:.3f}s")
|
||||
return np.array(sv).reshape(-1), t_search + t_contract
|
||||
return None, t_search + t_contract
|
||||
|
||||
|
||||
def run_mpi_expval(
|
||||
circuit,
|
||||
nqubits,
|
||||
observable=None,
|
||||
total_repeats=1024,
|
||||
search_workers=1,
|
||||
search_timeout=300,
|
||||
):
|
||||
"""Compute a Hamiltonian expectation value directly from TN via MPI.
|
||||
MPI parallelizes over Hamiltonian terms; ProcessPool optionally helps
|
||||
path search for each term."""
|
||||
import torch
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
b = qibo.get_backend()
|
||||
b.configure_tn_simulation(ansatz="tn")
|
||||
|
||||
observable = check_observable(observable, nqubits)
|
||||
ham_gate_map = extract_gates_and_qubits(observable)
|
||||
|
||||
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
|
||||
gate_opts={"max_bond": None, "cutoff": 1e-10})
|
||||
|
||||
my_terms = ham_gate_map[rank::size]
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
t0 = time.time()
|
||||
|
||||
my_exp = 0.0 + 0.0j
|
||||
for term in my_terms:
|
||||
coeff, op, where = _term_to_quimb_operator(term)
|
||||
if op is None:
|
||||
my_exp += coeff
|
||||
continue
|
||||
tn = qc.local_expectation_tn(op, where=where)
|
||||
if len(tn.outer_inds()) == 0:
|
||||
val = complex(tn.contract())
|
||||
else:
|
||||
tree = parallel_search(
|
||||
tn,
|
||||
tn.outer_inds(),
|
||||
total_repeats,
|
||||
n_workers=search_workers,
|
||||
num_slices=1,
|
||||
n_ranks=size,
|
||||
timeout=search_timeout,
|
||||
)
|
||||
if tree is None:
|
||||
raise RuntimeError("Failed to find a contraction tree for expectation TN.")
|
||||
arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in tn.arrays]
|
||||
acc = sum(tree.contract_slice(arrays, i) for i in range(tree.multiplicity))
|
||||
val = complex(acc.item() if hasattr(acc, 'item') else acc)
|
||||
my_exp += coeff * val
|
||||
|
||||
t_total = time.time() - t0
|
||||
|
||||
all_results = comm.gather(my_exp, root=0)
|
||||
if rank == 0:
|
||||
total_exp = sum(all_results)
|
||||
print(f"\n[TN expval] time={t_total:.4f}s expval={total_exp.real:.12f}")
|
||||
return np.real_if_close(total_exp), t_total
|
||||
return None, t_total
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=30)
|
||||
parser.add_argument("--circuit", type=str, default="qft",
|
||||
choices=["qft", "variational", "ghz", "brickwork"])
|
||||
parser.add_argument("--nlayers", type=int, default=3)
|
||||
parser.add_argument("--num-slices", type=int, default=1)
|
||||
parser.add_argument("--total-repeats", type=int, default=1024)
|
||||
parser.add_argument("--search-workers", type=int, default=1)
|
||||
parser.add_argument("--search-timeout", type=int, default=300)
|
||||
parser.add_argument("--observable-file", type=str, default=None)
|
||||
parser.add_argument("--observable-json", type=str, default=None)
|
||||
parser.add_argument("--save-path", type=str, default=None)
|
||||
parser.add_argument("--load-path", type=str, default=None)
|
||||
parser.add_argument("--no-compare", action="store_true")
|
||||
parser.add_argument("--mode", type=str, default="sv", choices=["sv", "expval"])
|
||||
args = parser.parse_args()
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
|
||||
if rank == 0:
|
||||
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, "
|
||||
f"nlayers={args.nlayers}, ranks={comm.Get_size()}")
|
||||
|
||||
np.random.seed(42)
|
||||
circuit = make_circuit(args.circuit, args.nqubits, args.nlayers)
|
||||
observable = _load_observable(args.observable_file, args.observable_json)
|
||||
|
||||
if args.mode == "expval":
|
||||
try:
|
||||
expval, t_total = run_mpi_expval(
|
||||
circuit,
|
||||
args.nqubits,
|
||||
observable=observable,
|
||||
total_repeats=args.total_repeats,
|
||||
search_workers=args.search_workers,
|
||||
search_timeout=args.search_timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
if rank == 0:
|
||||
print(f"[FAILED] {e}")
|
||||
raise
|
||||
if rank == 0:
|
||||
np.save(f"data/expval_tn_{args.circuit}{args.nqubits}.npy", np.asarray(expval))
|
||||
if not args.no_compare:
|
||||
print("No built-in reference comparison for arbitrary observables.")
|
||||
return
|
||||
|
||||
try:
|
||||
sv, t_total = run_mpi(circuit, args.nqubits, args.num_slices,
|
||||
total_repeats=args.total_repeats,
|
||||
load_path=args.load_path, save_path=args.save_path)
|
||||
except Exception as e:
|
||||
if rank == 0:
|
||||
print(f"[FAILED] {e}")
|
||||
raise
|
||||
|
||||
if rank == 0 and sv is not None:
|
||||
print(f"\n[quimb TN MPI] time={t_total:.4f}s shape={sv.shape}")
|
||||
np.save(f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy", sv)
|
||||
|
||||
if not args.no_compare:
|
||||
from qibotn.bak.benchmark_tn import run_qibojit
|
||||
import gc
|
||||
np.random.seed(42)
|
||||
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
|
||||
sv_ref, t_ref = run_qibojit(circuit_ref)
|
||||
np.save(f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy", sv_ref)
|
||||
print(f"[qibojit] time={t_ref:.4f}s")
|
||||
# free memory before loading via mmap for expval comparison
|
||||
del sv, sv_ref
|
||||
gc.collect()
|
||||
from compare_jit_tn_quimb import check_results
|
||||
ref_path = f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy"
|
||||
tn_path = f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy"
|
||||
check_results(ref_path, tn_path, args.nqubits)
|
||||
if t_total > 0:
|
||||
print(f"Speedup : {t_ref/t_total:.2f}x")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,25 +0,0 @@
|
||||
"""Check contraction tree statistics."""
|
||||
import pickle, sys
|
||||
|
||||
path = sys.argv[1] if len(sys.argv) > 1 else "data/tree_q25_l10.pkl"
|
||||
with open(path, 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
|
||||
# Intel 8558P: 96 cores, 2.1GHz, AVX-512 (16 FP64/cycle), FMA x2
|
||||
# complex128 multiply-add = 6 real FLOPs
|
||||
CORES = 96
|
||||
FREQ = 2.1e9
|
||||
AVX512_FP64 = 16
|
||||
TFLOPS = CORES * FREQ * AVX512_FP64 * 2 / 1e12 # ~6.45 TFLOPS real FP64
|
||||
COMPLEX_FLOPS = TFLOPS / 6 # complex128 effective
|
||||
|
||||
flops = tree.total_flops()
|
||||
slices = tree.multiplicity
|
||||
est_seconds = flops * slices / (COMPLEX_FLOPS * 1e12)
|
||||
|
||||
print(f"File: {path}")
|
||||
print(f"Peak memory (GB): {tree.max_size() * 16 / 1e9:.2f}")
|
||||
print(f"Total FLOPs: {flops:.2e} x{slices} slices = {flops*slices:.2e}")
|
||||
print(f"Contraction width: {tree.contraction_width()}")
|
||||
print(f"Multiplicity (slices): {slices}")
|
||||
print(f"Estimated time (96 cores): {est_seconds:.1f}s ({est_seconds/3600:.2f}h)")
|
||||
@@ -1,137 +0,0 @@
|
||||
"""Compare QMatchaTeaBackend with the VidalBackend fast path."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed, kind):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind == "brickwall":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
elif kind == "shifted-cz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(q, q + 1))
|
||||
elif kind == "reversed-cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
return circuit
|
||||
|
||||
|
||||
def build_observable(nqubits, kind):
|
||||
form = 0
|
||||
if kind == "ring-xz":
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
elif kind == "open-zz":
|
||||
for q in range(nqubits - 1):
|
||||
form += Z(q) * Z(q + 1) / (nqubits - 1)
|
||||
elif kind == "mixed":
|
||||
form += 0.25 * X(0) - 0.5 * Z(nqubits - 1)
|
||||
for q in range(0, nqubits - 1, 3):
|
||||
form += 0.125 * Y(q) * Y(q + 1)
|
||||
else:
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def run_backend(backend, circuit, observable):
|
||||
start = time.perf_counter()
|
||||
value = backend.expectation(circuit, observable, preprocess=False, compile_circuit=True)
|
||||
return float(np.real(value)), time.perf_counter() - start
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=34)
|
||||
parser.add_argument("--nlayers", type=int, default=20)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("torch", "numpy"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument(
|
||||
"--circuit-kind",
|
||||
choices=("brickwall", "shifted-cz", "reversed-cnot"),
|
||||
default="brickwall",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--observable-kind",
|
||||
choices=("ring-xz", "open-zz", "mixed"),
|
||||
default="ring-xz",
|
||||
)
|
||||
parser.add_argument("--reference-file")
|
||||
parser.add_argument("--skip-qmatchatea", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed, args.circuit_kind)
|
||||
observable = build_observable(args.nqubits, args.observable_kind)
|
||||
|
||||
exact = None
|
||||
if args.reference_file:
|
||||
with open(args.reference_file, "r", encoding="utf-8") as f:
|
||||
exact = float(json.load(f)["expectation"])
|
||||
|
||||
print(
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} bond={args.bond} "
|
||||
f"circuit={args.circuit_kind} observable={args.observable_kind} "
|
||||
f"tensor_module={args.tensor_module} torch_threads={args.torch_threads}"
|
||||
)
|
||||
if exact is not None:
|
||||
print(f"exact={exact:.16e}")
|
||||
print("backend value abs_error seconds")
|
||||
|
||||
if not args.skip_qmatchatea:
|
||||
qmt = QMatchaTeaBackend()
|
||||
qmt.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
svd_control="E!",
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
)
|
||||
value, seconds = run_backend(qmt, circuit, observable)
|
||||
error = float("nan") if exact is None else abs(value - exact)
|
||||
print(f"qmatchatea {value:.16e} {error:.6e} {seconds:.3f}")
|
||||
|
||||
vidal = VidalBackend()
|
||||
vidal.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
fallback=True,
|
||||
)
|
||||
value, seconds = run_backend(vidal, circuit, observable)
|
||||
error = float("nan") if exact is None else abs(value - exact)
|
||||
print(f"vidal {value:.16e} {error:.6e} {seconds:.3f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,33 +0,0 @@
|
||||
"""Example custom case for tools/run_tn_custom.py."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
return circuit
|
||||
|
||||
|
||||
def build_observable(nqubits, seed):
|
||||
return {
|
||||
"terms": [
|
||||
{
|
||||
"coefficient": 1.0 / max(1, nqubits - 1),
|
||||
"operators": [("Z", site), ("Z", site + 1)],
|
||||
}
|
||||
for site in range(nqubits - 1)
|
||||
]
|
||||
}
|
||||
@@ -1,208 +0,0 @@
|
||||
"""Inspect cotengra contraction trees for dominant torch matmul shapes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import importlib
|
||||
import math
|
||||
import pickle
|
||||
from collections import Counter, defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _prod(values):
|
||||
out = 1
|
||||
for value in values:
|
||||
out *= int(value)
|
||||
return out
|
||||
|
||||
|
||||
def _broadcast_batch(a_batch, b_batch):
|
||||
if a_batch == b_batch:
|
||||
return _prod(a_batch)
|
||||
if not a_batch:
|
||||
return _prod(b_batch)
|
||||
if not b_batch:
|
||||
return _prod(a_batch)
|
||||
|
||||
ndim = max(len(a_batch), len(b_batch))
|
||||
a_batch = (1,) * (ndim - len(a_batch)) + tuple(a_batch)
|
||||
b_batch = (1,) * (ndim - len(b_batch)) + tuple(b_batch)
|
||||
return _prod(max(a, b) for a, b in zip(a_batch, b_batch))
|
||||
|
||||
|
||||
def _load_tree(path, index):
|
||||
with Path(path).open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
trees = payload["trees"] if isinstance(payload, dict) else payload
|
||||
if not isinstance(trees, (list, tuple)):
|
||||
trees = [trees]
|
||||
return trees[index]
|
||||
|
||||
|
||||
def _analyze_tree(tree):
|
||||
contract_mod = importlib.import_module("cotengra.contract")
|
||||
contractions = contract_mod.extract_contractions(tree)
|
||||
size_dict = tree.size_dict
|
||||
ops = []
|
||||
counts = Counter()
|
||||
|
||||
for op_index, (parent, left, right, tdot, arg, perm) in enumerate(contractions):
|
||||
if left is None and right is None:
|
||||
counts["preprocess"] += 1
|
||||
continue
|
||||
|
||||
left_inds = tree.get_inds(left)
|
||||
right_inds = tree.get_inds(right)
|
||||
parent_inds = tree.get_inds(parent)
|
||||
left_shape = tuple(size_dict[ix] for ix in left_inds)
|
||||
right_shape = tuple(size_dict[ix] for ix in right_inds)
|
||||
|
||||
if tdot:
|
||||
parsed = contract_mod._parse_tensordot_axes_to_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
else:
|
||||
parsed = contract_mod._parse_eq_to_batch_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
|
||||
(
|
||||
_eq_a,
|
||||
_eq_b,
|
||||
new_shape_a,
|
||||
new_shape_b,
|
||||
_new_shape_ab,
|
||||
_perm_ab,
|
||||
pure_multiplication,
|
||||
) = parsed
|
||||
|
||||
matmul_shape = None
|
||||
matmul_flops = 0
|
||||
if pure_multiplication:
|
||||
kind = "mul"
|
||||
else:
|
||||
a_shape = tuple(new_shape_a or left_shape)
|
||||
b_shape = tuple(new_shape_b or right_shape)
|
||||
batch = _broadcast_batch(a_shape[:-2], b_shape[:-2])
|
||||
m, k, n = int(a_shape[-2]), int(a_shape[-1]), int(b_shape[-1])
|
||||
kind = "mm" if batch == 1 else "bmm"
|
||||
matmul_shape = (batch, m, k, n)
|
||||
matmul_flops = batch * m * k * n
|
||||
|
||||
tree_flops = int(tree.get_flops(parent))
|
||||
out_size = int(tree.get_size(parent))
|
||||
ops.append(
|
||||
{
|
||||
"index": op_index,
|
||||
"kind": kind,
|
||||
"matmul_shape": matmul_shape,
|
||||
"matmul_flops": matmul_flops,
|
||||
"tree_flops": tree_flops,
|
||||
"out_size": out_size,
|
||||
"left_shape": left_shape,
|
||||
"right_shape": right_shape,
|
||||
"left_rank": len(left_inds),
|
||||
"right_rank": len(right_inds),
|
||||
"out_rank": len(parent_inds),
|
||||
"perm": perm,
|
||||
}
|
||||
)
|
||||
counts[kind] += 1
|
||||
|
||||
return contractions, ops, counts
|
||||
|
||||
|
||||
def _format_log(value, base):
|
||||
return "-inf" if value <= 0 else f"{math.log(value, base):.3f}"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("tree", help="Pickle file containing one tree or {'trees': [...]}.")
|
||||
parser.add_argument("--index", type=int, default=0, help="Tree index in the file.")
|
||||
parser.add_argument("--top", type=int, default=20, help="Number of top ops to print.")
|
||||
parser.add_argument(
|
||||
"--dtype-bytes",
|
||||
type=int,
|
||||
default=8,
|
||||
help="Bytes per element for memory estimates, for example 8 for complex64.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tree = _load_tree(args.tree, args.index)
|
||||
contractions, ops, counts = _analyze_tree(tree)
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
per_slice_flops = sum(op["tree_flops"] for op in ops)
|
||||
per_slice_write = sum(op["out_size"] for op in ops)
|
||||
max_out = max((op["out_size"] for op in ops), default=0)
|
||||
all_flops = per_slice_flops * nslices
|
||||
all_write = per_slice_write * nslices
|
||||
|
||||
print(f"tree={args.tree} index={args.index}")
|
||||
print(
|
||||
"summary "
|
||||
f"slices={nslices} contractions={len(contractions)} "
|
||||
f"counts={dict(counts)}"
|
||||
)
|
||||
print(
|
||||
"per_slice "
|
||||
f"log10_flops={_format_log(per_slice_flops, 10)} "
|
||||
f"log10_write={_format_log(per_slice_write, 10)} "
|
||||
f"log2_max_output={_format_log(max_out, 2)} "
|
||||
f"max_output_gib={max_out * args.dtype_bytes / 1024**3:.6g}"
|
||||
)
|
||||
print(
|
||||
"all_slices "
|
||||
f"log10_flops={_format_log(all_flops, 10)} "
|
||||
f"log10_write={_format_log(all_write, 10)}"
|
||||
)
|
||||
|
||||
print(f"\ntop_{args.top}_ops_by_flops")
|
||||
for op in sorted(ops, key=lambda item: item["tree_flops"], reverse=True)[: args.top]:
|
||||
print(
|
||||
f"op={op['index']} kind={op['kind']} "
|
||||
f"flops={op['tree_flops']:.6e} out={op['out_size']:.6e} "
|
||||
f"matmul={op['matmul_shape']} "
|
||||
f"ranks=({op['left_rank']},{op['right_rank']}->{op['out_rank']}) "
|
||||
f"lhs={op['left_shape']} rhs={op['right_shape']}"
|
||||
)
|
||||
|
||||
by_shape = defaultdict(lambda: [0, 0, 0])
|
||||
for op in ops:
|
||||
shape = op["matmul_shape"]
|
||||
if shape is None:
|
||||
continue
|
||||
by_shape[shape][0] += 1
|
||||
by_shape[shape][1] += op["tree_flops"]
|
||||
by_shape[shape][2] += op["out_size"]
|
||||
|
||||
print(f"\ntop_{args.top}_matmul_shapes_by_flops")
|
||||
for shape, (count, flops, out_size) in sorted(
|
||||
by_shape.items(),
|
||||
key=lambda item: item[1][1],
|
||||
reverse=True,
|
||||
)[: args.top]:
|
||||
print(
|
||||
f"shape={shape} count={count} "
|
||||
f"flops={flops:.6e} output={out_size:.6e}"
|
||||
)
|
||||
|
||||
print(f"\ntop_{args.top}_matmul_shapes_by_count")
|
||||
for shape, (count, flops, out_size) in sorted(
|
||||
by_shape.items(),
|
||||
key=lambda item: item[1][0],
|
||||
reverse=True,
|
||||
)[: args.top]:
|
||||
print(
|
||||
f"shape={shape} count={count} "
|
||||
f"flops={flops:.6e} output={out_size:.6e}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,223 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Manage the dask cluster used by TN path search.
|
||||
#
|
||||
# Defaults target two servers:
|
||||
# scheduler: 10.20.1.103:8786
|
||||
# workers: 10.20.1.103, 10.20.6.101
|
||||
#
|
||||
# Usage:
|
||||
# tools/manage_tn_dask_cluster.sh start
|
||||
# tools/manage_tn_dask_cluster.sh status
|
||||
# tools/manage_tn_dask_cluster.sh stop
|
||||
#
|
||||
# Common overrides:
|
||||
# SCHEDULER_HOST=10.20.1.103
|
||||
# WORKER_HOSTS="10.20.1.103 10.20.6.101"
|
||||
# NWORKERS=48
|
||||
# NTHREADS=1
|
||||
# ROOT_DIR=/home/yx/qibotn
|
||||
# PYTHON_BIN=.venv/bin/python
|
||||
|
||||
ROOT_DIR="${ROOT_DIR:-/home/yx/qibotn}"
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
SCHEDULER_HOST="${SCHEDULER_HOST:-10.20.1.103}"
|
||||
SCHEDULER_PORT="${SCHEDULER_PORT:-8786}"
|
||||
DASHBOARD_ADDRESS="${DASHBOARD_ADDRESS:-:8787}"
|
||||
WORKER_HOSTS="${WORKER_HOSTS:-10.20.1.103 10.20.6.101}"
|
||||
NWORKERS="${NWORKERS:-84}"
|
||||
NTHREADS="${NTHREADS:-1}"
|
||||
MEMORY_LIMIT="${MEMORY_LIMIT:-0}"
|
||||
LOCAL_DIRECTORY="${LOCAL_DIRECTORY:-/tmp/qibotn-dask}"
|
||||
LOG_DIR="${LOG_DIR:-$ROOT_DIR/logs/dask}"
|
||||
SSH_BIN="${SSH_BIN:-ssh}"
|
||||
DASK_WORKER_TTL="${DASK_WORKER_TTL:-24 hours}"
|
||||
DASK_TICK_LIMIT="${DASK_TICK_LIMIT:-30 minutes}"
|
||||
DASK_LOST_WORKER_TIMEOUT="${DASK_LOST_WORKER_TIMEOUT:-30 minutes}"
|
||||
|
||||
SCHEDULER_ADDR="tcp://${SCHEDULER_HOST}:${SCHEDULER_PORT}"
|
||||
|
||||
is_local_host() {
|
||||
local host="$1"
|
||||
[[ "$host" == "localhost" || "$host" == "127.0.0.1" ]] && return 0
|
||||
[[ "$host" == "$(hostname)" ]] && return 0
|
||||
[[ "$host" == "$(hostname -f 2>/dev/null || true)" ]] && return 0
|
||||
hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx "$host"
|
||||
}
|
||||
|
||||
run_on_host() {
|
||||
local host="$1"
|
||||
shift
|
||||
local cmd="$*"
|
||||
if is_local_host "$host"; then
|
||||
bash -lc "$cmd"
|
||||
else
|
||||
"$SSH_BIN" "$host" "bash -lc $(printf '%q' "$cmd")"
|
||||
fi
|
||||
}
|
||||
|
||||
start_scheduler() {
|
||||
local host="$SCHEDULER_HOST"
|
||||
local log="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.log"
|
||||
local pid_file="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.pid"
|
||||
run_on_host "$host" "
|
||||
set -euo pipefail
|
||||
cd '$ROOT_DIR'
|
||||
mkdir -p '$LOG_DIR'
|
||||
if [[ -s '$pid_file' ]]; then
|
||||
pid=\$(cat '$pid_file')
|
||||
if kill -0 \"\$pid\" 2>/dev/null; then
|
||||
echo \"scheduler already running on $host pid=\$pid\"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL='$DASK_WORKER_TTL' \
|
||||
DASK_DISTRIBUTED__ADMIN__TICK__LIMIT='$DASK_TICK_LIMIT' \
|
||||
DASK_DISTRIBUTED__DEPLOY__LOST_WORKER_TIMEOUT='$DASK_LOST_WORKER_TIMEOUT' \
|
||||
setsid '$PYTHON_BIN' -m distributed.cli.dask_scheduler \
|
||||
--host '$SCHEDULER_HOST' \
|
||||
--port '$SCHEDULER_PORT' \
|
||||
--dashboard-address '$DASHBOARD_ADDRESS' \
|
||||
> '$log' 2>&1 < /dev/null &
|
||||
pid=\$!
|
||||
echo \"\$pid\" > '$pid_file'
|
||||
echo \"scheduler host=$host pid=\$pid addr=$SCHEDULER_ADDR log=$log\"
|
||||
"
|
||||
}
|
||||
|
||||
start_worker() {
|
||||
local host="$1"
|
||||
local log="$LOG_DIR/worker_${host}.log"
|
||||
local pid_file="$LOG_DIR/worker_${host}.pid"
|
||||
run_on_host "$host" "
|
||||
set -euo pipefail
|
||||
cd '$ROOT_DIR'
|
||||
mkdir -p '$LOG_DIR' '$LOCAL_DIRECTORY'
|
||||
if [[ -s '$pid_file' ]]; then
|
||||
pid=\$(cat '$pid_file')
|
||||
if kill -0 \"\$pid\" 2>/dev/null; then
|
||||
echo \"worker already running on $host pid=\$pid\"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
TCM_ENABLE=1 \
|
||||
DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL='$DASK_WORKER_TTL' \
|
||||
DASK_DISTRIBUTED__ADMIN__TICK__LIMIT='$DASK_TICK_LIMIT' \
|
||||
DASK_DISTRIBUTED__DEPLOY__LOST_WORKER_TIMEOUT='$DASK_LOST_WORKER_TIMEOUT' \
|
||||
setsid '$PYTHON_BIN' -m distributed.cli.dask_worker \
|
||||
'$SCHEDULER_ADDR' \
|
||||
--host '$host' \
|
||||
--nworkers '$NWORKERS' \
|
||||
--nthreads '$NTHREADS' \
|
||||
--memory-limit '$MEMORY_LIMIT' \
|
||||
--local-directory '$LOCAL_DIRECTORY' \
|
||||
> '$log' 2>&1 < /dev/null &
|
||||
pid=\$!
|
||||
echo \"\$pid\" > '$pid_file'
|
||||
echo \"worker host=$host pid=\$pid scheduler=$SCHEDULER_ADDR log=$log\"
|
||||
"
|
||||
}
|
||||
|
||||
stop_host() {
|
||||
local host="$1"
|
||||
local scheduler_pid_file="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.pid"
|
||||
local worker_pid_file="$LOG_DIR/worker_${host}.pid"
|
||||
run_on_host "$host" "
|
||||
set +e
|
||||
for pid_file in '$worker_pid_file' '$scheduler_pid_file'; do
|
||||
[[ -f \"\$pid_file\" ]] || continue
|
||||
if [[ \"\$pid_file\" == '$scheduler_pid_file' && '$host' != '$SCHEDULER_HOST' ]]; then
|
||||
continue
|
||||
fi
|
||||
pid=\$(cat \"\$pid_file\")
|
||||
kill \"\$pid\" 2>/dev/null || true
|
||||
rm -f \"\$pid_file\"
|
||||
done
|
||||
pkill -f '[d]istributed.cli.dask_worker.*$SCHEDULER_ADDR'
|
||||
pkill -f '[d]istributed.cli.dask_scheduler.*--port $SCHEDULER_PORT'
|
||||
true
|
||||
"
|
||||
}
|
||||
|
||||
status_host() {
|
||||
local host="$1"
|
||||
local scheduler_pid_file="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.pid"
|
||||
local worker_pid_file="$LOG_DIR/worker_${host}.pid"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "host=$host"
|
||||
run_on_host "$host" "
|
||||
set +e
|
||||
for pid_file in '$worker_pid_file' '$scheduler_pid_file'; do
|
||||
[[ -f \"\$pid_file\" ]] || continue
|
||||
if [[ \"\$pid_file\" == '$scheduler_pid_file' && '$host' != '$SCHEDULER_HOST' ]]; then
|
||||
continue
|
||||
fi
|
||||
pid=\$(cat \"\$pid_file\")
|
||||
if kill -0 \"\$pid\" 2>/dev/null; then
|
||||
ps -p \"\$pid\" -o pid,ppid,stat,etime,cmd --no-headers
|
||||
else
|
||||
echo \"stale pid_file=\$pid_file pid=\$pid\"
|
||||
fi
|
||||
done
|
||||
pgrep -af '[d]istributed.cli.dask' || true
|
||||
"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
start)
|
||||
start_scheduler
|
||||
sleep 2
|
||||
for host in $WORKER_HOSTS; do
|
||||
start_worker "$host"
|
||||
done
|
||||
echo
|
||||
echo "Dask scheduler: $SCHEDULER_ADDR"
|
||||
echo "Dashboard: http://$SCHEDULER_HOST$DASHBOARD_ADDRESS"
|
||||
;;
|
||||
stop)
|
||||
for host in $WORKER_HOSTS; do
|
||||
stop_host "$host"
|
||||
done
|
||||
stop_host "$SCHEDULER_HOST"
|
||||
;;
|
||||
status)
|
||||
status_host "$SCHEDULER_HOST"
|
||||
for host in $WORKER_HOSTS; do
|
||||
[[ "$host" == "$SCHEDULER_HOST" ]] && continue
|
||||
status_host "$host"
|
||||
done
|
||||
;;
|
||||
restart)
|
||||
"$0" stop
|
||||
sleep 2
|
||||
"$0" start
|
||||
;;
|
||||
help|*)
|
||||
cat <<EOF
|
||||
Usage: tools/manage_tn_dask_cluster.sh [start|stop|restart|status]
|
||||
|
||||
Defaults:
|
||||
SCHEDULER_HOST=$SCHEDULER_HOST
|
||||
SCHEDULER_PORT=$SCHEDULER_PORT
|
||||
WORKER_HOSTS="$WORKER_HOSTS"
|
||||
NWORKERS=$NWORKERS
|
||||
NTHREADS=$NTHREADS
|
||||
ROOT_DIR=$ROOT_DIR
|
||||
PYTHON_BIN=$PYTHON_BIN
|
||||
DASK_WORKER_TTL="$DASK_WORKER_TTL"
|
||||
DASK_TICK_LIMIT=$DASK_TICK_LIMIT
|
||||
DASK_LOST_WORKER_TIMEOUT=$DASK_LOST_WORKER_TIMEOUT
|
||||
|
||||
Search command after start:
|
||||
TCM_ENABLE=1 python -u tools/tn_contest_runner.py search \\
|
||||
--case main1 \\
|
||||
--dask-address $SCHEDULER_ADDR \\
|
||||
--torch-threads 48 \\
|
||||
--dtype complex64 \\
|
||||
--tn-search-repeats 2048 \\
|
||||
--tn-search-time 300
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,313 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""Contest-style multi-node Vidal/MPS expectation runner."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend # noqa: E402
|
||||
from qibotn.expectation_runner import exact_for_observable # noqa: E402
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CaseSpec:
|
||||
circuit_kind: str
|
||||
observables: tuple[str, ...]
|
||||
nqubits: int
|
||||
nlayers: int
|
||||
bond: int | None
|
||||
seed: int
|
||||
|
||||
|
||||
CASES = {
|
||||
"main1": CaseSpec(
|
||||
circuit_kind="reversed_cnot",
|
||||
observables=("ring_xz",),
|
||||
nqubits=128,
|
||||
nlayers=24,
|
||||
bond=512,
|
||||
seed=31001,
|
||||
),
|
||||
"main2": CaseSpec(
|
||||
circuit_kind="rxx_rzz",
|
||||
observables=("open_zz", "range2_xx", "mixed_local"),
|
||||
nqubits=128,
|
||||
nlayers=32,
|
||||
bond=1024,
|
||||
seed=31002,
|
||||
),
|
||||
"strong": CaseSpec(
|
||||
circuit_kind="scramble",
|
||||
observables=("ring_xz", "long_z_string", "dense3_spread"),
|
||||
nqubits=256,
|
||||
nlayers=48,
|
||||
bond=2048,
|
||||
seed=41001,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def add_single_qubit_layer(circuit, nqubits, rng, include_rx=False):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if include_rx:
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
if kind == "reversed_cnot":
|
||||
add_single_qubit_layer(circuit, nqubits, rng)
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 == 0 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
|
||||
elif kind == "rxx_rzz":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
|
||||
elif kind == "scramble":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def dense_observable(nqubits, qubits, seed, dim):
|
||||
del nqubits
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def observable(kind, nqubits, seed):
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
if kind == "boundary_ZZ_q1":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))
|
||||
if kind == "boundary_ZZ_q2":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))
|
||||
if kind == "boundary_ZZ_q3":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))
|
||||
if kind == "long_Z_5_sites":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last))
|
||||
if kind == "mixed_XZYZX":
|
||||
return hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last))
|
||||
if kind == "ring_xz":
|
||||
form = 0
|
||||
for qubit in range(nqubits):
|
||||
form += 0.5 * X(qubit) * Z((qubit + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "open_zz":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 1):
|
||||
form += (1.0 / max(1, nqubits - 1)) * Z(qubit) * Z(qubit + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "range2_xx":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 2):
|
||||
form += (1.0 / max(1, nqubits - 2)) * X(qubit) * X(qubit + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "mixed_local":
|
||||
form = 0.25 * X(0) - 0.5 * Z(last) + 0.125 * X(q1) * Z(q2) * Y(q3)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "complex_iZ0":
|
||||
return hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))
|
||||
if kind == "dense2_mid":
|
||||
return dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)
|
||||
if kind == "dense3_spread":
|
||||
return dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def selected_observables(args, case):
|
||||
if args.observables:
|
||||
return tuple(args.observables)
|
||||
if args.obs_filter:
|
||||
return tuple(x.strip() for x in args.obs_filter.split(",") if x.strip())
|
||||
return case.observables
|
||||
|
||||
|
||||
def apply_case_defaults(args):
|
||||
case = CASES[args.case]
|
||||
if args.nqubits is None:
|
||||
args.nqubits = case.nqubits
|
||||
if args.nlayers is None:
|
||||
args.nlayers = case.nlayers
|
||||
if args.bond == "case-default":
|
||||
args.bond = case.bond
|
||||
if args.seed is None:
|
||||
args.seed = case.seed
|
||||
args.observables = selected_observables(args, case)
|
||||
|
||||
|
||||
def run_case(args):
|
||||
set_torch_threads(args.torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
case = CASES[args.case]
|
||||
circuit = build_circuit(case.circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
"backend=vidal_mps "
|
||||
f"case={args.case} circuit={case.circuit_kind} ranks={size} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} gates={len(circuit.queue)} "
|
||||
f"bond={format_optional(args.bond)} cut_ratio={format_optional(args.cut_ratio)} "
|
||||
f"torch_threads={args.torch_threads} seed={args.seed} "
|
||||
f"observables={','.join(args.observables)}",
|
||||
flush=True,
|
||||
)
|
||||
print("observable exact value abs_error rel_error seconds trunc_sum trunc_max status", flush=True)
|
||||
|
||||
for obs_name in args.observables:
|
||||
obs = observable(obs_name, args.nqubits, args.seed)
|
||||
exact = None
|
||||
if args.exact and rank == 0:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, obs, args.nqubits)
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
obs,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
status = "ok"
|
||||
except Exception as exc:
|
||||
value = np.nan
|
||||
status = type(exc).__name__ + ":" + str(exc).split("\n", 1)[0]
|
||||
seconds = time.perf_counter() - start
|
||||
|
||||
if rank == 0:
|
||||
abs_error = float("nan") if exact is None else abs(value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"{obs_name} {exact_text} {value!r} "
|
||||
f"{abs_error:.6e} {rel_error:.6e} {seconds:.3f} "
|
||||
f"{backend.last_truncation_error:.6e} "
|
||||
f"{backend.last_max_truncation_error:.6e} {status}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("mode", choices=("run", "validate", "list"))
|
||||
parser.add_argument("--case", choices=sorted(CASES), default="main1")
|
||||
parser.add_argument("--observables", nargs="+")
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
parser.add_argument("--nqubits", type=int)
|
||||
parser.add_argument("--nlayers", type=int)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", default="case-default")
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--seed", type=int)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mode == "list":
|
||||
for name, case in CASES.items():
|
||||
print(
|
||||
f"{name}: circuit={case.circuit_kind} "
|
||||
f"observables={','.join(case.observables)} "
|
||||
f"nqubits={case.nqubits} nlayers={case.nlayers} "
|
||||
f"bond={case.bond} seed={case.seed}"
|
||||
)
|
||||
return
|
||||
|
||||
apply_case_defaults(args)
|
||||
if isinstance(args.bond, str):
|
||||
args.bond = optional_int(args.bond)
|
||||
|
||||
if args.mode == "validate":
|
||||
args.exact = True
|
||||
args.nqubits = min(args.nqubits, args.exact_max_qubits)
|
||||
|
||||
run_case(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,72 +0,0 @@
|
||||
"""Chrome trace profiler for the VidalBackend fast path."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from torch.profiler import ProfilerActivity, profile
|
||||
|
||||
from qibotn.benchmark_cases import build_circuit, terms_to_dict, observable_terms
|
||||
from qibotn.expectation_runner import ExpectationConfig, run_cpu_expectation
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=34)
|
||||
parser.add_argument("--nlayers", type=int, default=20)
|
||||
parser.add_argument("--bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument("--cut-ratio", type=float, default=1e-12)
|
||||
parser.add_argument("--profile-memory", action="store_true")
|
||||
parser.add_argument("--rows", type=int, default=60)
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
|
||||
prefix = f"profiles/vidal_n{args.nqubits}_l{args.nlayers}_b{args.bond}_t{args.torch_threads}"
|
||||
trace_path = Path(f"{prefix}.json")
|
||||
table_path = Path(f"{prefix}.txt")
|
||||
trace_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
circuit = build_circuit("brickwall_cnot", args.nqubits, args.nlayers, args.seed)
|
||||
observable = terms_to_dict(observable_terms("ring_xz", args.nqubits))
|
||||
config = ExpectationConfig(
|
||||
ansatz="mps",
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
torch_threads=args.torch_threads,
|
||||
)
|
||||
|
||||
print(
|
||||
f"profile vidal nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={args.bond} threads={args.torch_threads}"
|
||||
)
|
||||
|
||||
with profile(
|
||||
activities=[ProfilerActivity.CPU],
|
||||
record_shapes=args.profile_memory,
|
||||
profile_memory=args.profile_memory,
|
||||
with_stack=args.profile_memory,
|
||||
) as prof:
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
|
||||
table = (
|
||||
f"expval={result.value:.16e}\n\n"
|
||||
f"# sorted by self_cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='self_cpu_time_total', row_limit=args.rows)}\n\n"
|
||||
f"# sorted by cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='cpu_time_total', row_limit=args.rows)}\n"
|
||||
)
|
||||
|
||||
print(table, end="")
|
||||
table_path.write_text(table, encoding="utf-8")
|
||||
prof.export_chrome_trace(str(trace_path))
|
||||
print(f"trace={trace_path}\ntable={table_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,109 +0,0 @@
|
||||
"""Compute and cache a qibojit state-vector reference for the ring-XZ observable."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import qibo
|
||||
from qibo import Circuit, gates
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for _ in range(nlayers):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz_expectation(state, nqubits, chunk_size):
|
||||
value = 0.0
|
||||
for qubit in range(nqubits):
|
||||
next_qubit = (qubit + 1) % nqubits
|
||||
x_flip = 1 << (nqubits - 1 - qubit)
|
||||
z_shift = nqubits - 1 - next_qubit
|
||||
term = 0.0
|
||||
for start in range(0, state.size, chunk_size):
|
||||
stop = min(start + chunk_size, state.size)
|
||||
indices = np.arange(start, stop, dtype=np.int64)
|
||||
z_bit = (indices >> z_shift) & 1
|
||||
z_phase = 1 - 2 * z_bit
|
||||
term += np.vdot(state[indices ^ x_flip], z_phase * state[start:stop]).real
|
||||
value += 0.5 * term
|
||||
return float(value)
|
||||
|
||||
|
||||
def default_output_path(nqubits, nlayers, seed):
|
||||
return Path("references") / (
|
||||
f"qibojit_ring_xz_n{nqubits}_l{nlayers}_seed{seed}.json"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=32)
|
||||
parser.add_argument("--nlayers", type=int, default=3)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--output")
|
||||
parser.add_argument("--force", action="store_true")
|
||||
parser.add_argument("--allow-large", action="store_true")
|
||||
parser.add_argument("--max-state-gb", type=float, default=32.0)
|
||||
parser.add_argument("--chunk-size", type=int, default=1 << 20)
|
||||
args = parser.parse_args()
|
||||
|
||||
output = Path(args.output) if args.output else default_output_path(
|
||||
args.nqubits, args.nlayers, args.seed
|
||||
)
|
||||
if output.exists() and not args.force:
|
||||
with open(output, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
print(f"loaded {output}")
|
||||
print(f"expectation={float(data['expectation']):.16e}")
|
||||
return
|
||||
|
||||
state_gb = (2**args.nqubits) * np.dtype(np.complex128).itemsize / (1024**3)
|
||||
if state_gb > args.max_state_gb and not args.allow_large:
|
||||
raise MemoryError(
|
||||
f"Estimated state vector alone is {state_gb:.1f} GiB. "
|
||||
"Pass --allow-large after confirming the node has enough memory."
|
||||
)
|
||||
|
||||
qibo.set_backend("qibojit")
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed)
|
||||
|
||||
start = time.perf_counter()
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
expectation = ring_xz_expectation(state, args.nqubits, args.chunk_size)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
data = {
|
||||
"backend": "qibojit",
|
||||
"observable": "0.5 * sum_i X_i Z_((i+1) mod n)",
|
||||
"nqubits": args.nqubits,
|
||||
"nlayers": args.nlayers,
|
||||
"seed": args.seed,
|
||||
"expectation": expectation,
|
||||
"seconds": elapsed,
|
||||
"state_vector_gib_estimate": state_gb,
|
||||
}
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, sort_keys=True)
|
||||
f.write("\n")
|
||||
|
||||
print(f"saved {output}")
|
||||
print(f"expectation={expectation:.16e}")
|
||||
print(f"seconds={elapsed:.3f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,127 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Large CPU expectation benchmarks for two-server runs.
|
||||
#
|
||||
# Defaults assume two Intel Xeon Platinum 8558P servers with about 500 GiB RAM
|
||||
# each. Override HOSTFILE, PYTHON_BIN, MPIEXEC, or the per-case knobs below as
|
||||
# needed.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-hostfile}"
|
||||
|
||||
MPS_RANKS="${MPS_RANKS:-8}"
|
||||
MPS_THREADS="${MPS_THREADS:-12}"
|
||||
TN_RANKS="${TN_RANKS:-12}"
|
||||
TN_THREADS="${TN_THREADS:-8}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
run_mpi() {
|
||||
local ranks="$1"
|
||||
shift
|
||||
"$MPIEXEC" -hostfile "$HOSTFILE" -n "$ranks" "$PYTHON_BIN" "$@"
|
||||
}
|
||||
|
||||
run_case() {
|
||||
local title="$1"
|
||||
shift
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$title"
|
||||
echo "================================================================================"
|
||||
echo "HOSTFILE=$HOSTFILE PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
echo "$*"
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
smoke)
|
||||
run_case "MPS MPI smoke: n=40 layers=30 bond=2048" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_SMOKE_NQ:-40}" \
|
||||
--nlayers "${MPS_SMOKE_LAYERS:-30}" \
|
||||
--bond "${MPS_SMOKE_BOND:-2048}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz \
|
||||
--observables ring_xz open_zz range2_xx
|
||||
|
||||
run_case "TN MPI smoke: n=32 layers=16 target_slices=12" \
|
||||
run_mpi "$TN_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "${TN_SMOKE_NQ:-32}" \
|
||||
--nlayers "${TN_SMOKE_LAYERS:-16}" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices "${TN_SMOKE_SLICES:-12}"
|
||||
;;
|
||||
|
||||
mps-long)
|
||||
run_case "MPS MPI long: n=64 layers=48 bond=4096" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_LONG_NQ:-64}" \
|
||||
--nlayers "${MPS_LONG_LAYERS:-48}" \
|
||||
--bond "${MPS_LONG_BOND:-4096}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
mps-pressure)
|
||||
run_case "MPS MPI pressure: n=80 layers=64 bond=4096" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_PRESSURE_NQ:-80}" \
|
||||
--nlayers "${MPS_PRESSURE_LAYERS:-64}" \
|
||||
--bond "${MPS_PRESSURE_BOND:-4096}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz swap_scramble \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
tn-long)
|
||||
run_case "TN MPI long: n=36 layers=20 target_slices=24" \
|
||||
run_mpi "$TN_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "${TN_LONG_NQ:-36}" \
|
||||
--nlayers "${TN_LONG_LAYERS:-20}" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices "${TN_LONG_SLICES:-24}"
|
||||
;;
|
||||
|
||||
all)
|
||||
"$0" smoke
|
||||
"$0" mps-long
|
||||
"$0" tn-long
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_cpu_large_cases.sh [smoke|mps-long|mps-pressure|tn-long|all]
|
||||
|
||||
Common overrides:
|
||||
HOSTFILE=hostfile
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPS_RANKS=8 MPS_THREADS=12
|
||||
TN_RANKS=12 TN_THREADS=8
|
||||
|
||||
Scale overrides:
|
||||
MPS_LONG_NQ=64 MPS_LONG_LAYERS=48 MPS_LONG_BOND=4096
|
||||
MPS_PRESSURE_NQ=80 MPS_PRESSURE_LAYERS=64 MPS_PRESSURE_BOND=4096
|
||||
TN_LONG_NQ=36 TN_LONG_LAYERS=20 TN_LONG_SLICES=24
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,148 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Single-node CPU scale probes for expectation benchmarks.
|
||||
#
|
||||
# Intended for one 96-core / ~500 GiB RAM node. The default "probe" mode runs
|
||||
# moderate MPS and TN cases first. Larger modes are available after checking
|
||||
# runtime and memory from the probe output.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
PYTHON_FLAGS="${PYTHON_FLAGS:--u}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
TIME_BIN="${TIME_BIN:-/usr/bin/time}"
|
||||
|
||||
MPS_RANKS="${MPS_RANKS:-8}"
|
||||
MPS_THREADS="${MPS_THREADS:-12}"
|
||||
TN_RANKS="${TN_RANKS:-8}"
|
||||
TN_THREADS="${TN_THREADS:-12}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
estimate_mps_memory() {
|
||||
local nqubits="$1"
|
||||
local bond="$2"
|
||||
"$PYTHON_BIN" - "$nqubits" "$bond" "$MPS_RANKS" <<'PY'
|
||||
import sys
|
||||
n = int(sys.argv[1])
|
||||
chi = int(sys.argv[2])
|
||||
ranks = int(sys.argv[3])
|
||||
resident = n * 2 * chi * chi * 16
|
||||
per_rank = resident / ranks
|
||||
print(
|
||||
"MPS rough resident memory: "
|
||||
f"total={resident / 1024**3:.1f} GiB "
|
||||
f"per_rank={per_rank / 1024**3:.1f} GiB "
|
||||
"(temporary eig/SVD workspaces are additional)"
|
||||
)
|
||||
PY
|
||||
}
|
||||
|
||||
run_timed() {
|
||||
echo
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "$*"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
"$TIME_BIN" -v "$@"
|
||||
}
|
||||
|
||||
run_mps_case() {
|
||||
local label="$1"
|
||||
local nqubits="$2"
|
||||
local nlayers="$3"
|
||||
local bond="$4"
|
||||
shift 4
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$label"
|
||||
echo "================================================================================"
|
||||
echo "PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "MPS_RANKS=$MPS_RANKS MPS_THREADS=$MPS_THREADS"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
estimate_mps_memory "$nqubits" "$bond"
|
||||
run_timed "$MPIEXEC" -n "$MPS_RANKS" "$PYTHON_BIN" $PYTHON_FLAGS benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "$nqubits" \
|
||||
--nlayers "$nlayers" \
|
||||
--bond "$bond" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
run_tn_case() {
|
||||
local label="$1"
|
||||
local nqubits="$2"
|
||||
local nlayers="$3"
|
||||
shift 3
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$label"
|
||||
echo "================================================================================"
|
||||
echo "PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "TN_RANKS=$TN_RANKS TN_THREADS=$TN_THREADS"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
echo "TN memory is contraction-tree dependent; increase --tn-target-slices if RSS is high."
|
||||
run_timed "$MPIEXEC" -n "$TN_RANKS" "$PYTHON_BIN" $PYTHON_FLAGS benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "$nqubits" \
|
||||
--nlayers "$nlayers" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
probe)
|
||||
run_mps_case "MPS probe: n=40 layers=30 bond=2048" 40 30 2048 \
|
||||
--circuits brickwall_cnot \
|
||||
--observables ring_xz
|
||||
|
||||
run_tn_case "TN probe: n=28 layers=12 target_slices=8" 28 12 \
|
||||
--circuits brickwall_cnot \
|
||||
--observables ring_xz \
|
||||
--tn-target-slices 8
|
||||
;;
|
||||
|
||||
mps-medium)
|
||||
run_mps_case "MPS medium: n=56 layers=40 bond=3072" 56 40 3072 \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
mps-long)
|
||||
run_mps_case "MPS long: n=64 layers=48 bond=4096" 64 48 4096 \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
tn-medium)
|
||||
run_tn_case "TN medium: n=32 layers=16 target_slices=16" 32 16 \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices 16
|
||||
;;
|
||||
|
||||
tn-long)
|
||||
run_tn_case "TN long: n=36 layers=20 target_slices=32" 36 20 \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices 32
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_cpu_single_cases.sh [probe|mps-medium|mps-long|tn-medium|tn-long]
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPS_RANKS=8 MPS_THREADS=12
|
||||
TN_RANKS=8 TN_THREADS=12
|
||||
OMP_NUM_THREADS=1 MKL_NUM_THREADS=1
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,243 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""Run TN expectation for a user-provided circuit and observable.
|
||||
|
||||
The case module should define:
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed): ...
|
||||
def build_observable(nqubits, seed): ...
|
||||
|
||||
``build_observable`` may return a Qibo SymbolicHamiltonian/form or the qibotn
|
||||
dict form:
|
||||
|
||||
{"terms": [
|
||||
{"coefficient": 1.0, "operators": [("X", 0), ("Z", 1)]},
|
||||
]}
|
||||
|
||||
For a single repeated Pauli string, pass ``--pauli-pattern`` instead of
|
||||
defining ``build_observable``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import importlib.util
|
||||
import inspect
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from qibotn.expectation_runner import ( # noqa: E402
|
||||
ExpectationConfig,
|
||||
exact_for_observable,
|
||||
run_cpu_expectation,
|
||||
)
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def load_module(path):
|
||||
path = Path(path).resolve()
|
||||
spec = importlib.util.spec_from_file_location(path.stem, path)
|
||||
if spec is None or spec.loader is None:
|
||||
raise RuntimeError(f"Cannot import case module from {path}.")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def call_builder(fn, **kwargs):
|
||||
sig = inspect.signature(fn)
|
||||
if any(p.kind == p.VAR_KEYWORD for p in sig.parameters.values()):
|
||||
return fn(**kwargs)
|
||||
accepted = {
|
||||
name: value
|
||||
for name, value in kwargs.items()
|
||||
if name in sig.parameters
|
||||
}
|
||||
return fn(**accepted)
|
||||
|
||||
|
||||
def load_observable(args, module):
|
||||
if args.pauli_pattern:
|
||||
return {"pauli_string_pattern": args.pauli_pattern}
|
||||
if args.observable_json:
|
||||
with Path(args.observable_json).open() as f:
|
||||
return json.load(f)
|
||||
if hasattr(module, "build_observable"):
|
||||
return call_builder(
|
||||
module.build_observable,
|
||||
nqubits=args.nqubits,
|
||||
nlayers=args.nlayers,
|
||||
seed=args.seed,
|
||||
)
|
||||
if hasattr(module, "OBSERVABLE"):
|
||||
return module.OBSERVABLE
|
||||
raise ValueError(
|
||||
"No observable supplied. Define build_observable/OBSERVABLE in the case "
|
||||
"module, or pass --pauli-pattern / --observable-json."
|
||||
)
|
||||
|
||||
|
||||
def build_parallel_opts(args):
|
||||
slicing_opts = {}
|
||||
if args.tn_target_slices is not None:
|
||||
slicing_opts["target_slices"] = args.tn_target_slices
|
||||
if args.tn_target_size is not None:
|
||||
slicing_opts["target_size"] = args.tn_target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": args.tn_search_workers or args.torch_threads,
|
||||
"max_repeats": args.tn_search_repeats,
|
||||
"max_time": args.tn_search_time,
|
||||
"print_stats": not args.no_tn_stats,
|
||||
}
|
||||
if args.tn_search_backend is not None:
|
||||
opts["search_backend"] = args.tn_search_backend
|
||||
if args.dask_address is not None:
|
||||
opts["dask_address"] = args.dask_address
|
||||
if args.dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
if args.tn_save_tree is not None:
|
||||
opts["save_tree_path"] = args.tn_save_tree
|
||||
if args.tn_load_tree is not None:
|
||||
opts["load_tree_path"] = args.tn_load_tree
|
||||
if args.tn_search_only:
|
||||
opts["search_only"] = True
|
||||
return opts
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run CPU TN expectation for a custom qibo circuit module."
|
||||
)
|
||||
parser.add_argument("case_module", help="Python file defining build_circuit.")
|
||||
parser.add_argument("--nqubits", type=int, required=True)
|
||||
parser.add_argument("--nlayers", type=int, default=0)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=1024)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--quimb-backend", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--dtype", choices=("complex128", "complex64"), default="complex128")
|
||||
parser.add_argument("--pauli-pattern")
|
||||
parser.add_argument("--observable-json")
|
||||
parser.add_argument("--tn-target-slices", type=int)
|
||||
parser.add_argument("--tn-target-size", type=int, default=2**32)
|
||||
parser.add_argument("--tn-search-workers", type=int)
|
||||
parser.add_argument("--tn-search-repeats", type=int, default=128)
|
||||
parser.add_argument("--tn-search-time", type=float, default=60.0)
|
||||
parser.add_argument("--tn-search-backend", choices=("processpool", "dask"))
|
||||
parser.add_argument("--dask-address")
|
||||
parser.add_argument("--dask-close-workers", action="store_true")
|
||||
parser.add_argument("--tn-save-tree")
|
||||
parser.add_argument("--tn-load-tree")
|
||||
parser.add_argument("--tn-search-only", action="store_true")
|
||||
parser.add_argument("--no-tn-stats", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
rank = 0
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
module = load_module(args.case_module)
|
||||
if not hasattr(module, "build_circuit"):
|
||||
raise ValueError("case_module must define build_circuit.")
|
||||
|
||||
circuit = call_builder(
|
||||
module.build_circuit,
|
||||
nqubits=args.nqubits,
|
||||
nlayers=args.nlayers,
|
||||
seed=args.seed,
|
||||
)
|
||||
observable = load_observable(args, module)
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz="tn",
|
||||
mpi=args.mpi,
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=args.quimb_backend,
|
||||
dtype=args.dtype,
|
||||
torch_threads=args.torch_threads,
|
||||
parallel_opts=build_parallel_opts(args),
|
||||
)
|
||||
|
||||
if rank == 0:
|
||||
mode = "MPI" if args.mpi else "serial"
|
||||
print(
|
||||
f"backend=cpu ansatz=TN mode={mode} case={Path(args.case_module).name} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} seed={args.seed} "
|
||||
f"quimb_backend={args.quimb_backend} dtype={args.dtype} "
|
||||
f"torch_threads={args.torch_threads}",
|
||||
flush=True,
|
||||
)
|
||||
print("observable exact value abs_error rel_error seconds", flush=True)
|
||||
|
||||
exact = None
|
||||
if args.exact and rank == 0:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, observable, args.nqubits)
|
||||
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if args.mpi and result.rank != 0:
|
||||
return
|
||||
|
||||
abs_error = float("nan") if exact is None else abs(result.value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"custom {exact_text} {result.value:.16e} "
|
||||
f"{abs_error:.6e} {rel_error:.6e} {result.seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for stat in result.parallel_stats or ():
|
||||
cost = stat["path_cost"]
|
||||
search_stats = stat.get("search_stats", {})
|
||||
print(
|
||||
"tn_term_summary "
|
||||
f"term={stat.get('term_index', 0)} "
|
||||
f"search_seconds={stat.get('search_seconds', float('nan')):.3f} "
|
||||
f"contract_seconds={stat.get('contract_seconds', float('nan')):.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', 'na')} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={cost.get('slices')} "
|
||||
f"log10_flops={cost.get('log10_flops', float('nan')):.3f} "
|
||||
f"log10_write={cost.get('log10_write', float('nan')):.3f} "
|
||||
f"log2_size={cost.get('log2_size', float('nan')):.3f} "
|
||||
f"peak_memory_gib={cost.get('peak_memory_gib', float('nan')):.3g} "
|
||||
f"rank_slices={stat.get('rank_slices')}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,93 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
CASE="${CASE:-main1}"
|
||||
OBSERVABLES="${OBSERVABLES:-long_z_string}"
|
||||
NQUBITS="${NQUBITS:-34}"
|
||||
NLAYERS="${NLAYERS:-20}"
|
||||
TORCH_THREADS="${TORCH_THREADS:-48}"
|
||||
SEARCH_REPEATS="${SEARCH_REPEATS:-2048}"
|
||||
SEARCH_TIME="${SEARCH_TIME:-300}"
|
||||
TN_TARGET_SIZE="${TN_TARGET_SIZE:-8589934592}"
|
||||
TN_TARGET_SLICES="${TN_TARGET_SLICES:-}"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
DTYPE="${DTYPE:-complex64}"
|
||||
TREE_DIR="${TREE_DIR:-trees/contest_tn}"
|
||||
DASK_ADDRESS="${DASK_ADDRESS:-tcp://10.20.1.103:8786}"
|
||||
MPIEXEC_FULL="${MPIEXEC_FULL:-mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2}"
|
||||
SYNC_TREES="${SYNC_TREES:-1}"
|
||||
SYNC_HOSTS="${SYNC_HOSTS:-${WORKER_HOSTS:-}}"
|
||||
SSH_BIN="${SSH_BIN:-ssh}"
|
||||
|
||||
export TCM_ENABLE="${TCM_ENABLE:-1}"
|
||||
|
||||
tn_slice_args=(--tn-target-size "$TN_TARGET_SIZE")
|
||||
if [[ -n "$TN_TARGET_SLICES" ]]; then
|
||||
tn_slice_args+=(--tn-target-slices "$TN_TARGET_SLICES")
|
||||
fi
|
||||
|
||||
is_local_host() {
|
||||
local host="$1"
|
||||
[[ "$host" == "localhost" || "$host" == "127.0.0.1" ]] && return 0
|
||||
[[ "$host" == "$(hostname)" ]] && return 0
|
||||
[[ "$host" == "$(hostname -f 2>/dev/null || true)" ]] && return 0
|
||||
hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx "$host"
|
||||
}
|
||||
|
||||
sync_trees_to_hosts() {
|
||||
[[ "$SYNC_TREES" == "1" ]] || return 0
|
||||
[[ -n "$SYNC_HOSTS" ]] || return 0
|
||||
|
||||
local src_dir="$TREE_DIR"
|
||||
local dst_dir="$TREE_DIR"
|
||||
if [[ "$TREE_DIR" != /* ]]; then
|
||||
src_dir="$ROOT_DIR/$TREE_DIR"
|
||||
dst_dir="$ROOT_DIR/$TREE_DIR"
|
||||
fi
|
||||
|
||||
for host in $SYNC_HOSTS; do
|
||||
is_local_host "$host" && continue
|
||||
echo "Sync tree dir to $host:$dst_dir"
|
||||
"$SSH_BIN" "$host" "mkdir -p $(printf '%q' "$dst_dir")"
|
||||
if command -v rsync >/dev/null 2>&1; then
|
||||
rsync -a "$src_dir/" "$host:$dst_dir/"
|
||||
else
|
||||
scp -q "$src_dir"/*.pkl "$host:$dst_dir/"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
tools/manage_tn_dask_cluster.sh start
|
||||
|
||||
echo "Search with dask: $DASK_ADDRESS"
|
||||
"$PYTHON_BIN" -u tools/tn_contest_runner.py search \
|
||||
--case "$CASE" \
|
||||
--nqubits "$NQUBITS" \
|
||||
--nlayers "$NLAYERS" \
|
||||
--observables $OBSERVABLES \
|
||||
--tree-dir "$TREE_DIR" \
|
||||
--dask-address "$DASK_ADDRESS" \
|
||||
--torch-threads "$TORCH_THREADS" \
|
||||
--dtype "$DTYPE" \
|
||||
--tn-search-repeats "$SEARCH_REPEATS" \
|
||||
--tn-search-time "$SEARCH_TIME" \
|
||||
"${tn_slice_args[@]}"
|
||||
|
||||
sync_trees_to_hosts
|
||||
|
||||
echo "Contract with MPI: $MPIEXEC_FULL"
|
||||
read -r -a mpi_prefix <<< "$MPIEXEC_FULL"
|
||||
"${mpi_prefix[@]}" "$PYTHON_BIN" -u tools/tn_contest_runner.py contract \
|
||||
--mpi \
|
||||
--case "$CASE" \
|
||||
--nqubits "$NQUBITS" \
|
||||
--nlayers "$NLAYERS" \
|
||||
--observables $OBSERVABLES \
|
||||
--tree-dir "$TREE_DIR" \
|
||||
--torch-threads "$TORCH_THREADS" \
|
||||
--dtype "$DTYPE" \
|
||||
"${tn_slice_args[@]}"
|
||||
@@ -1,340 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Contest-style Vidal/MPI MPS cases.
|
||||
#
|
||||
# Usage:
|
||||
# tools/run_vidal_mpi_contest_cases.sh main1
|
||||
# tools/run_vidal_mpi_contest_cases.sh main2
|
||||
# tools/run_vidal_mpi_contest_cases.sh strong
|
||||
# tools/run_vidal_mpi_contest_cases.sh all
|
||||
#
|
||||
# Common overrides:
|
||||
# PYTHON_BIN=.venv/bin/python
|
||||
# MPIEXEC=mpiexec
|
||||
# MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2"
|
||||
# HOSTFILE=hostfile # optional; used only if the file exists
|
||||
# RANKS=8
|
||||
# TORCH_THREADS=8
|
||||
# CUT_RATIO=1e-12
|
||||
# OBS_FILTER="boundary_ZZ_q2 ring_xz dense3_spread complex_iZ0"
|
||||
#
|
||||
# Per-case overrides:
|
||||
# MAIN1_NQ=128 MAIN1_LAYERS=50 MAIN1_BOND=1024 MAIN1_SEED=31001
|
||||
# MAIN2_NQ=128 MAIN2_LAYERS=64 MAIN2_BOND=2048 MAIN2_SEED=31002
|
||||
# STRONG_NQ=256 STRONG_LAYERS=64 STRONG_BOND=2048 STRONG_SEED=41001
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-}"
|
||||
RANKS="${RANKS:-4}"
|
||||
TORCH_THREADS="${TORCH_THREADS:-1}"
|
||||
CUT_RATIO="${CUT_RATIO:-1e-12}"
|
||||
OBS_FILTER="${OBS_FILTER:-}"
|
||||
|
||||
RUNNER_DIR="$ROOT_DIR/.tmp"
|
||||
mkdir -p "$RUNNER_DIR"
|
||||
RUNNER="$(mktemp "$RUNNER_DIR/qibotn_vidal_contest.XXXXXX.py")"
|
||||
cleanup() {
|
||||
rm -f "$RUNNER"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
cat > "$RUNNER" <<'PY'
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind in ("rxx_rzz", "scramble"):
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind == "reversed_cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 else gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 == 0 else gates.CNOT(q, q + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
elif kind == "scramble":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def open_zz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 1):
|
||||
form += (1.0 / (nqubits - 1)) * Z(q) * Z(q + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def range2_xx(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 2):
|
||||
form += (1.0 / (nqubits - 2)) * X(q) * X(q + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def dense_observable(nqubits, qubits, seed, dim):
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def observables_for_case(nqubits, seed):
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
return [
|
||||
("boundary_ZZ_q1", hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))),
|
||||
("boundary_ZZ_q2", hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))),
|
||||
("boundary_ZZ_q3", hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))),
|
||||
(
|
||||
"long_Z_5_sites",
|
||||
hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last)),
|
||||
),
|
||||
(
|
||||
"mixed_XZYZX",
|
||||
hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last)),
|
||||
),
|
||||
("ring_xz", ring_xz(nqubits)),
|
||||
("open_zz", open_zz(nqubits)),
|
||||
("range2_xx", range2_xx(nqubits)),
|
||||
("complex_iZ0", hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))),
|
||||
("dense2_mid", dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)),
|
||||
("dense3_spread", dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)),
|
||||
]
|
||||
|
||||
|
||||
def run_case(args):
|
||||
set_torch_threads(args.torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
circuit = build_circuit(args.kind, args.nqubits, args.nlayers, args.seed)
|
||||
observables = observables_for_case(args.nqubits, args.seed)
|
||||
if args.obs_filter:
|
||||
wanted = set(args.obs_filter.split(","))
|
||||
observables = [(name, obs) for name, obs in observables if name in wanted]
|
||||
if not observables:
|
||||
raise ValueError(f"OBS_FILTER matched no observables: {args.obs_filter!r}")
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
"case "
|
||||
f"label={args.label} kind={args.kind} ranks={size} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} gates={len(circuit.queue)} "
|
||||
f"bond={args.bond} cut_ratio={args.cut_ratio:g} "
|
||||
f"torch_threads={args.torch_threads} seed={args.seed} "
|
||||
f"obs_filter={args.obs_filter or 'all'}",
|
||||
flush=True,
|
||||
)
|
||||
print(
|
||||
"observable value seconds trunc_sum trunc_max status",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for obs_name, observable in observables:
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
status = "ok"
|
||||
except Exception as exc: # pragma: no cover - printed for manual runs
|
||||
value = np.nan
|
||||
status = type(exc).__name__ + ":" + str(exc).split("\n", 1)[0]
|
||||
seconds = time.perf_counter() - start
|
||||
|
||||
if rank == 0:
|
||||
print(
|
||||
f"{obs_name} {value!r} {seconds:.3f} "
|
||||
f"{backend.last_truncation_error:.6e} "
|
||||
f"{backend.last_max_truncation_error:.6e} {status}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--label", required=True)
|
||||
parser.add_argument("--kind", choices=("reversed_cnot", "rxx_rzz", "scramble"), required=True)
|
||||
parser.add_argument("--nqubits", type=int, required=True)
|
||||
parser.add_argument("--nlayers", type=int, required=True)
|
||||
parser.add_argument("--bond", type=int, required=True)
|
||||
parser.add_argument("--cut-ratio", type=float, required=True)
|
||||
parser.add_argument("--seed", type=int, required=True)
|
||||
parser.add_argument("--torch-threads", type=int, required=True)
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
run_case(parser.parse_args())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
PY
|
||||
|
||||
if [[ -n "${MPIEXEC_FULL:-}" ]]; then
|
||||
read -r -a mpi_prefix <<< "$MPIEXEC_FULL"
|
||||
else
|
||||
mpi_prefix=("$MPIEXEC")
|
||||
if [[ -n "$HOSTFILE" && -f "$HOSTFILE" ]]; then
|
||||
mpi_prefix+=("-hostfile" "$HOSTFILE")
|
||||
fi
|
||||
mpi_prefix+=("-n" "$RANKS")
|
||||
fi
|
||||
|
||||
run_case() {
|
||||
local label="$1"
|
||||
local kind="$2"
|
||||
local nq="$3"
|
||||
local layers="$4"
|
||||
local bond="$5"
|
||||
local seed="$6"
|
||||
|
||||
echo
|
||||
echo "Running $label: kind=$kind nqubits=$nq layers=$layers bond=$bond seed=$seed"
|
||||
echo "MPI: ${mpi_prefix[*]}"
|
||||
"${mpi_prefix[@]}" "$PYTHON_BIN" -u "$ROOT_DIR/tools/vidal_mpi_contest_runner.py" \
|
||||
--label "$label" \
|
||||
--kind "$kind" \
|
||||
--nqubits "$nq" \
|
||||
--nlayers "$layers" \
|
||||
--bond "$bond" \
|
||||
--cut-ratio "$CUT_RATIO" \
|
||||
--seed "$seed" \
|
||||
--torch-threads "$TORCH_THREADS" \
|
||||
--obs-filter "$(tr ' ' ',' <<< "$OBS_FILTER")"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
main1)
|
||||
run_case \
|
||||
"main1-reversed-cnot" \
|
||||
"reversed_cnot" \
|
||||
"${MAIN1_NQ:-128}" \
|
||||
"${MAIN1_LAYERS:-50}" \
|
||||
"${MAIN1_BOND:-1024}" \
|
||||
"${MAIN1_SEED:-31001}"
|
||||
;;
|
||||
main2)
|
||||
run_case \
|
||||
"main2-rxx-rzz" \
|
||||
"rxx_rzz" \
|
||||
"${MAIN2_NQ:-128}" \
|
||||
"${MAIN2_LAYERS:-64}" \
|
||||
"${MAIN2_BOND:-2048}" \
|
||||
"${MAIN2_SEED:-31002}"
|
||||
;;
|
||||
strong)
|
||||
run_case \
|
||||
"strong-scramble" \
|
||||
"scramble" \
|
||||
"${STRONG_NQ:-256}" \
|
||||
"${STRONG_LAYERS:-64}" \
|
||||
"${STRONG_BOND:-2048}" \
|
||||
"${STRONG_SEED:-41001}"
|
||||
;;
|
||||
all)
|
||||
"$0" main1
|
||||
"$0" main2
|
||||
"$0" strong
|
||||
;;
|
||||
smoke)
|
||||
MAIN1_NQ="${MAIN1_NQ:-32}" \
|
||||
MAIN1_LAYERS="${MAIN1_LAYERS:-6}" \
|
||||
MAIN1_BOND="${MAIN1_BOND:-128}" \
|
||||
"$0" main1
|
||||
;;
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_vidal_mpi_contest_cases.sh [main1|main2|strong|all|smoke]
|
||||
|
||||
Cases:
|
||||
main1 128 qubits, 50 layers, reversed-CNOT brickwall, chi=1024
|
||||
main2 128 qubits, 64 layers, RXX/RZZ brickwall, chi=2048
|
||||
strong 256 qubits, 64 layers, RXX/RZZ + periodic SWAP scramble, chi=2048
|
||||
smoke Small syntax/runtime check of main1
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2"
|
||||
HOSTFILE=hostfile
|
||||
RANKS=8
|
||||
TORCH_THREADS=8
|
||||
CUT_RATIO=1e-12
|
||||
OBS_FILTER="boundary_ZZ_q2 ring_xz dense3_spread complex_iZ0"
|
||||
|
||||
Per-case overrides:
|
||||
MAIN1_NQ=128 MAIN1_LAYERS=50 MAIN1_BOND=1024 MAIN1_SEED=31001
|
||||
MAIN2_NQ=128 MAIN2_LAYERS=64 MAIN2_BOND=2048 MAIN2_SEED=31002
|
||||
STRONG_NQ=256 STRONG_LAYERS=64 STRONG_BOND=2048 STRONG_SEED=41001
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,70 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NQ="${NQ:-34}"
|
||||
LAYERS="${LAYERS:-20}"
|
||||
BOND="${BOND:-512}"
|
||||
SEED="${SEED:-42}"
|
||||
RANKS="${RANKS:-1 2 4}"
|
||||
THREADS="${THREADS:-32 32 16}"
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
CIRCUIT="${CIRCUIT:-brickwall_cnot}"
|
||||
OBSERVABLE="${OBSERVABLE:-ring_xz}"
|
||||
EXACT="${EXACT:-0}"
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
if [[ "${1:-help}" != "run" ]]; then
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_vidal_segment_mpi_scan.sh run
|
||||
|
||||
Overrides:
|
||||
NQ=34 LAYERS=20 BOND=512 SEED=42
|
||||
RANKS="1 2 4" THREADS="32 32 16"
|
||||
CIRCUIT=brickwall_cnot OBSERVABLE=ring_xz
|
||||
EXACT=1
|
||||
PYTHON_BIN=.venv/bin/python MPIEXEC=mpiexec
|
||||
EOF
|
||||
if [[ "${1:-help}" == "help" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
exit 2
|
||||
fi
|
||||
|
||||
read -r -a ranks <<< "$RANKS"
|
||||
read -r -a threads <<< "$THREADS"
|
||||
|
||||
if [[ "${#ranks[@]}" != "${#threads[@]}" ]]; then
|
||||
echo "RANKS and THREADS must have the same number of entries." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
common=(
|
||||
--nqubits "$NQ"
|
||||
--nlayers "$LAYERS"
|
||||
--bond "$BOND"
|
||||
--seed "$SEED"
|
||||
--mps
|
||||
--circuits "$CIRCUIT"
|
||||
--observables "$OBSERVABLE"
|
||||
)
|
||||
|
||||
if [[ "$EXACT" == "1" ]]; then
|
||||
common+=(--exact)
|
||||
fi
|
||||
|
||||
for idx in "${!ranks[@]}"; do
|
||||
nrank="${ranks[$idx]}"
|
||||
nthr="${threads[$idx]}"
|
||||
if [[ "$nrank" == "1" ]]; then
|
||||
echo "== Vidal serial ranks=1 torch_threads=$nthr =="
|
||||
"$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
"${common[@]}" --torch-threads "$nthr"
|
||||
else
|
||||
echo "== Vidal segmented MPI ranks=$nrank torch_threads=$nthr =="
|
||||
"$MPIEXEC" -n "$nrank" "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
"${common[@]}" --torch-threads "$nthr" --mpi
|
||||
fi
|
||||
done
|
||||
@@ -1,59 +0,0 @@
|
||||
"""Slice an existing saved cotengra tree without re-running path search."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
from qibotn.parallel import contraction_tree_costs
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("input", help="Input pickle saved by --tn-save-tree.")
|
||||
parser.add_argument("output", help="Output pickle path.")
|
||||
parser.add_argument("--term", type=int, default=0)
|
||||
parser.add_argument("--target-slices", type=int, default=2)
|
||||
parser.add_argument("--max-repeats", type=int, default=64)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
with input_path.open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
|
||||
trees = payload["trees"] if isinstance(payload, dict) else payload
|
||||
if not isinstance(trees, (list, tuple)):
|
||||
trees = [trees]
|
||||
tree = trees[args.term]
|
||||
|
||||
print("original", contraction_tree_costs(tree), flush=True)
|
||||
sliced = tree.slice(
|
||||
target_slices=args.target_slices,
|
||||
max_repeats=args.max_repeats,
|
||||
seed=args.seed,
|
||||
)
|
||||
print("sliced", contraction_tree_costs(sliced), flush=True)
|
||||
print(f"sliced_inds={sliced.sliced_inds}", flush=True)
|
||||
|
||||
new_trees = list(trees)
|
||||
new_trees[args.term] = sliced
|
||||
|
||||
if isinstance(payload, dict):
|
||||
out_payload = dict(payload)
|
||||
out_payload["trees"] = new_trees
|
||||
out_payload["costs"] = [contraction_tree_costs(t) for t in new_trees]
|
||||
out_payload["nterms"] = len(new_trees)
|
||||
else:
|
||||
out_payload = new_trees
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open("wb") as f:
|
||||
pickle.dump(out_payload, f)
|
||||
print(f"saved {output_path}", flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,440 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""Contest-style CPU TN path search and contraction runner.
|
||||
|
||||
This file is intentionally self-contained: define contest circuits and
|
||||
observables here, run path search once, then load the saved trees for repeated
|
||||
MPI contractions.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from qibotn.expectation_runner import ( # noqa: E402
|
||||
ExpectationConfig,
|
||||
exact_for_observable,
|
||||
run_cpu_expectation,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CaseSpec:
|
||||
circuit_kind: str
|
||||
observables: tuple[str, ...]
|
||||
nqubits: int
|
||||
nlayers: int
|
||||
seed: int
|
||||
target_slices: int | None = None
|
||||
|
||||
|
||||
CASES = {
|
||||
"main1": CaseSpec(
|
||||
circuit_kind="rxx_rzz_chain",
|
||||
observables=("ring_xz",),
|
||||
nqubits=34,
|
||||
nlayers=20,
|
||||
seed=31001,
|
||||
target_slices=None,
|
||||
),
|
||||
"main2": CaseSpec(
|
||||
circuit_kind="scramble_chain",
|
||||
observables=("open_zz", "range2_xx"),
|
||||
nqubits=36,
|
||||
nlayers=18,
|
||||
seed=31002,
|
||||
target_slices=None,
|
||||
),
|
||||
"strong": CaseSpec(
|
||||
circuit_kind="reversed_cnot",
|
||||
observables=("ring_xz", "long_z_string"),
|
||||
nqubits=40,
|
||||
nlayers=24,
|
||||
seed=41001,
|
||||
target_slices=None,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def add_single_qubit_layer(circuit, nqubits, rng, include_rx=False):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if include_rx:
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
"""Define contest circuits here."""
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
if kind == "rxx_rzz_chain":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
|
||||
elif kind == "scramble_chain":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
|
||||
elif kind == "reversed_cnot":
|
||||
add_single_qubit_layer(circuit, nqubits, rng)
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 == 0 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def pauli_sum_observable(kind, nqubits, seed):
|
||||
"""Define contest observables here.
|
||||
|
||||
TN path currently expects Pauli products / SymbolicHamiltonian terms.
|
||||
Keep production contest observables Hermitian unless complex output is
|
||||
explicitly required by the scoring rule.
|
||||
"""
|
||||
del seed
|
||||
if kind == "ring_xz":
|
||||
form = 0
|
||||
for qubit in range(nqubits):
|
||||
form += 0.5 * X(qubit) * Z((qubit + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "open_zz":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 1):
|
||||
form += (1.0 / max(1, nqubits - 1)) * Z(qubit) * Z(qubit + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "range2_xx":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 2):
|
||||
form += (1.0 / max(1, nqubits - 2)) * X(qubit) * X(qubit + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "long_z_string":
|
||||
stride = max(1, nqubits // 16)
|
||||
form = None
|
||||
for qubit in range(0, nqubits, stride):
|
||||
form = Z(qubit) if form is None else form * Z(qubit)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "mixed_local":
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
form = 0.25 * X(0) - 0.5 * Z(nqubits - 1)
|
||||
form += 0.125 * X(q1) * Z(q2) * Y(q3)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def tree_path(tree_dir, case_name, obs_name, nqubits, nlayers, target_slices):
|
||||
slice_label = "auto" if target_slices is None else f"s{target_slices}"
|
||||
return (
|
||||
Path(tree_dir)
|
||||
/ f"{case_name}_{obs_name}_{nqubits}q{nlayers}l_{slice_label}.pkl"
|
||||
)
|
||||
|
||||
|
||||
def build_parallel_opts(args, tree_file=None, search_only=False):
|
||||
slicing_opts = {}
|
||||
if args.tn_target_slices is not None:
|
||||
slicing_opts["target_slices"] = args.tn_target_slices
|
||||
if args.tn_target_size is not None:
|
||||
slicing_opts["target_size"] = args.tn_target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": args.tn_search_workers or args.torch_threads,
|
||||
"max_repeats": args.tn_search_repeats,
|
||||
"max_time": args.tn_search_time,
|
||||
"print_stats": False,
|
||||
}
|
||||
if args.tn_search_backend is not None:
|
||||
opts["search_backend"] = args.tn_search_backend
|
||||
if args.dask_address is not None:
|
||||
opts["dask_address"] = args.dask_address
|
||||
if args.dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
if args.tn_debug_trials:
|
||||
opts["debug_trials"] = True
|
||||
if search_only:
|
||||
opts["search_only"] = True
|
||||
opts["save_tree_path"] = str(tree_file)
|
||||
elif tree_file is not None:
|
||||
opts["load_tree_path"] = str(tree_file)
|
||||
return opts
|
||||
|
||||
|
||||
def run_one(args, case_name, obs_name, mode):
|
||||
case = CASES[case_name]
|
||||
circuit = build_circuit(case.circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
observable = pauli_sum_observable(obs_name, args.nqubits, args.seed)
|
||||
path = tree_path(
|
||||
args.tree_dir,
|
||||
case_name,
|
||||
obs_name,
|
||||
args.nqubits,
|
||||
args.nlayers,
|
||||
args.tn_target_slices,
|
||||
)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
rank = 0
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
f"mode={mode} case={case_name} circuit={case.circuit_kind} "
|
||||
f"observable={obs_name} nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"seed={args.seed} gates={len(circuit.queue)} tree={path}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if mode == "contract" and not path.exists():
|
||||
raise FileNotFoundError(f"Missing tree file: {path}. Run search first.")
|
||||
|
||||
exact = None
|
||||
if args.exact and rank == 0 and mode != "search":
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, observable, args.nqubits)
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz="tn",
|
||||
mpi=args.mpi,
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=args.quimb_backend,
|
||||
dtype=args.dtype,
|
||||
torch_threads=args.torch_threads,
|
||||
parallel_opts=build_parallel_opts(
|
||||
args,
|
||||
tree_file=path,
|
||||
search_only=(mode == "search"),
|
||||
),
|
||||
)
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if args.mpi and result.rank != 0:
|
||||
return
|
||||
|
||||
if mode == "search":
|
||||
print(f"searched observable={obs_name} tree={path}", flush=True)
|
||||
else:
|
||||
abs_error = float("nan") if exact is None else abs(result.value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"result observable={obs_name} exact={exact_text} "
|
||||
f"value={result.value:.16e} abs_error={abs_error:.6e} "
|
||||
f"rel_error={rel_error:.6e} seconds={result.seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for stat in result.parallel_stats or ():
|
||||
cost = stat["path_cost"]
|
||||
search_stats = stat.get("search_stats", {})
|
||||
print(
|
||||
"tn_term_summary "
|
||||
f"observable={obs_name} "
|
||||
f"term={stat.get('term_index', 0)} "
|
||||
f"search_seconds={stat.get('search_seconds', float('nan')):.3f} "
|
||||
f"contract_seconds={stat.get('contract_seconds', float('nan')):.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', 'na')} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={cost.get('nslices')} "
|
||||
f"log10_flops={cost.get('log10_flops', float('nan')):.3f} "
|
||||
f"log10_write={cost.get('log10_write', float('nan')):.3f} "
|
||||
f"log2_size={cost.get('log2_size', float('nan')):.3f} "
|
||||
f"peak_memory_gib={cost.get('peak_memory_gib', float('nan')):.3g} "
|
||||
f"rank_slices={stat.get('rank_slices')}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def selected_observables(args, case):
|
||||
if args.observables:
|
||||
return tuple(args.observables)
|
||||
if args.obs_filter:
|
||||
return tuple(x.strip() for x in args.obs_filter.split(",") if x.strip())
|
||||
return case.observables
|
||||
|
||||
|
||||
def apply_case_defaults(args):
|
||||
case = CASES[args.case]
|
||||
if args.nqubits is None:
|
||||
args.nqubits = case.nqubits
|
||||
if args.nlayers is None:
|
||||
args.nlayers = case.nlayers
|
||||
if args.seed is None:
|
||||
args.seed = case.seed
|
||||
if args.tn_target_slices is None:
|
||||
args.tn_target_slices = case.target_slices
|
||||
args.observables = selected_observables(args, case)
|
||||
|
||||
|
||||
def stop_dask_cluster(args):
|
||||
if args.keep_dask or args.tn_search_backend != "dask" or not args.dask_address:
|
||||
return
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
if MPI.COMM_WORLD.Get_rank() != 0:
|
||||
return
|
||||
script = ROOT / "tools" / "manage_tn_dask_cluster.sh"
|
||||
if not script.exists():
|
||||
print(f"dask_stop_skipped reason=missing_script path={script}", flush=True)
|
||||
return
|
||||
|
||||
env = os.environ.copy()
|
||||
parsed = urlparse(args.dask_address)
|
||||
if parsed.hostname:
|
||||
env.setdefault("SCHEDULER_HOST", parsed.hostname)
|
||||
if parsed.port:
|
||||
env.setdefault("SCHEDULER_PORT", str(parsed.port))
|
||||
|
||||
print("dask_stop_after_search start", flush=True)
|
||||
subprocess.run([str(script), "stop"], cwd=str(ROOT), env=env, check=False)
|
||||
print("dask_stop_after_search done", flush=True)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("mode", choices=("search", "contract", "all", "validate", "list"))
|
||||
parser.add_argument("--case", choices=sorted(CASES), default="main1")
|
||||
parser.add_argument("--observables", nargs="+")
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
parser.add_argument("--tree-dir", default="trees/contest_tn")
|
||||
parser.add_argument("--nqubits", type=int)
|
||||
parser.add_argument("--nlayers", type=int)
|
||||
parser.add_argument("--seed", type=int)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=1024)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--quimb-backend", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--dtype", choices=("complex128", "complex64"), default="complex64")
|
||||
parser.add_argument("--tn-target-slices", type=int)
|
||||
parser.add_argument("--tn-target-size", type=int, default=2**32)
|
||||
parser.add_argument("--tn-search-workers", type=int)
|
||||
parser.add_argument("--tn-search-repeats", type=int, default=2048)
|
||||
parser.add_argument("--tn-search-time", type=float, default=300.0)
|
||||
parser.add_argument(
|
||||
"--tn-search-backend",
|
||||
choices=("processpool", "dask"),
|
||||
default="dask",
|
||||
help=(
|
||||
"Path-search backend. Defaults to dask. Without --dask-address, "
|
||||
"non-MPI search starts a local dask cluster."
|
||||
),
|
||||
)
|
||||
parser.add_argument("--dask-address")
|
||||
parser.add_argument("--dask-close-workers", action="store_true")
|
||||
parser.add_argument(
|
||||
"--keep-dask",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Keep an external dask cluster running after search. By default, "
|
||||
"tools/manage_tn_dask_cluster.sh stop is called after search when "
|
||||
"--dask-address is used."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-debug-trials",
|
||||
action="store_true",
|
||||
help="Print dask worker summary and per-trial start/done logs.",
|
||||
)
|
||||
parser.add_argument("--no-tn-stats", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mode == "list":
|
||||
for name, case in CASES.items():
|
||||
print(
|
||||
f"{name}: circuit={case.circuit_kind} "
|
||||
f"observables={','.join(case.observables)} "
|
||||
f"nqubits={case.nqubits} nlayers={case.nlayers} "
|
||||
f"seed={case.seed} target_slices={case.target_slices}"
|
||||
)
|
||||
return
|
||||
|
||||
apply_case_defaults(args)
|
||||
set_torch_threads(args.torch_threads)
|
||||
|
||||
modes = ("search", "contract") if args.mode == "all" else (args.mode,)
|
||||
if args.mode == "validate":
|
||||
args.exact = True
|
||||
args.nqubits = min(args.nqubits, args.exact_max_qubits)
|
||||
modes = ("search", "contract")
|
||||
|
||||
for mode in modes:
|
||||
for obs_name in args.observables:
|
||||
run_one(args, args.case, obs_name, mode)
|
||||
if mode == "search":
|
||||
stop_dask_cluster(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,114 +0,0 @@
|
||||
"""Run the 34q/20L TN complex64 benchmark under torch.profiler briefly."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--seconds", type=float, default=30.0)
|
||||
parser.add_argument("--out-dir", default="torch_profiles/tn_complex64")
|
||||
parser.add_argument("--torch-threads", type=int, default=48)
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = Path(__file__).resolve().parents[1]
|
||||
os.chdir(repo_root)
|
||||
sys.path.insert(0, str(repo_root))
|
||||
|
||||
import torch
|
||||
from torch.profiler import ProfilerActivity, profile
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
out_dir = Path(args.out_dir)
|
||||
if rank == 0:
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
comm.Barrier()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
|
||||
def run_benchmark():
|
||||
import benchmark_cpu_expectation
|
||||
|
||||
sys.argv = [
|
||||
"benchmark_cpu_expectation.py",
|
||||
"--mpi",
|
||||
"--ansatz",
|
||||
"tn",
|
||||
"--nqubits",
|
||||
"34",
|
||||
"--nlayers",
|
||||
"20",
|
||||
"--circuits",
|
||||
"rxx_rzz",
|
||||
"--pauli-pattern",
|
||||
"XZ",
|
||||
"--tn-load-tree",
|
||||
"trees/rxx_rzz_34q20l_s4.pkl",
|
||||
"--quimb-backend",
|
||||
"torch",
|
||||
"--torch-threads",
|
||||
str(args.torch_threads),
|
||||
"--dtype",
|
||||
"complex64",
|
||||
]
|
||||
benchmark_cpu_expectation.main()
|
||||
|
||||
trace_path = out_dir / f"rank{rank}_trace.json"
|
||||
stacks_path = out_dir / f"rank{rank}_stacks.txt"
|
||||
summary_path = out_dir / f"rank{rank}_summary.txt"
|
||||
|
||||
prof = profile(
|
||||
activities=[ProfilerActivity.CPU],
|
||||
record_shapes=True,
|
||||
profile_memory=True,
|
||||
with_stack=True,
|
||||
)
|
||||
|
||||
class ProfileTimeout(Exception):
|
||||
pass
|
||||
|
||||
def alarm_handler(signum, frame):
|
||||
raise ProfileTimeout()
|
||||
|
||||
old_handler = signal.signal(signal.SIGALRM, alarm_handler)
|
||||
signal.setitimer(signal.ITIMER_REAL, args.seconds)
|
||||
try:
|
||||
with prof:
|
||||
try:
|
||||
run_benchmark()
|
||||
except ProfileTimeout:
|
||||
pass
|
||||
finally:
|
||||
signal.setitimer(signal.ITIMER_REAL, 0)
|
||||
signal.signal(signal.SIGALRM, old_handler)
|
||||
|
||||
prof.export_chrome_trace(str(trace_path))
|
||||
try:
|
||||
prof.export_stacks(str(stacks_path), "self_cpu_time_total")
|
||||
except Exception as exc: # pragma: no cover - diagnostic only
|
||||
stacks_path.write_text(f"export_stacks failed: {exc}\n", encoding="utf-8")
|
||||
|
||||
summary = prof.key_averages(group_by_stack_n=5).table(
|
||||
sort_by="self_cpu_time_total",
|
||||
row_limit=40,
|
||||
)
|
||||
summary_path.write_text(summary, encoding="utf-8")
|
||||
|
||||
print(
|
||||
f"torch_profile_done rank={rank}/{size} "
|
||||
f"trace={trace_path} summary={summary_path}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,202 +0,0 @@
|
||||
"""Correctness checks for the Vidal/TEBD MPS fast path.
|
||||
|
||||
The cases here intentionally cover more than the benchmark ring-XZ observable:
|
||||
different nearest-neighbor gate orientations and several Pauli-sum observables.
|
||||
Run serially to compare qibojit/statevector vs Vidal, or under MPI to compare
|
||||
the segmented Vidal executor.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from qibo import Circuit, gates
|
||||
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind == "rx_ry_cz":
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind in ("brickwall", "reversed_cnot"):
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
if kind == "reversed_cnot" and (layer % 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
else:
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
if kind == "reversed_cnot" and not (layer % 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
else:
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
elif kind == "rx_ry_cz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
return circuit
|
||||
|
||||
|
||||
def observable_terms(kind, nqubits):
|
||||
if kind == "ring_xz":
|
||||
return [
|
||||
(0.5, (("X", site), ("Z", (site + 1) % nqubits)))
|
||||
for site in range(nqubits)
|
||||
]
|
||||
if kind == "open_zz":
|
||||
return [
|
||||
(1.0 / (nqubits - 1), (("Z", site), ("Z", site + 1)))
|
||||
for site in range(nqubits - 1)
|
||||
]
|
||||
if kind == "mixed_local":
|
||||
terms = [(0.25, (("X", 0),)), (-0.5, (("Z", nqubits - 1),))]
|
||||
terms += [
|
||||
(0.125, (("Y", site), ("Y", site + 1)))
|
||||
for site in range(0, nqubits - 1, 3)
|
||||
]
|
||||
return terms
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def exact_pauli_sum(circuit, terms, nqubits):
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
indices = np.arange(state.size, dtype=np.int64)
|
||||
value = 0.0 + 0.0j
|
||||
for coeff, ops in terms:
|
||||
flipped = indices.copy()
|
||||
phase = np.ones(state.size, dtype=np.complex128)
|
||||
for name, site in ops:
|
||||
shift = nqubits - 1 - site
|
||||
bit = (indices >> shift) & 1
|
||||
name = name.upper()
|
||||
if name == "X":
|
||||
flipped ^= 1 << shift
|
||||
elif name == "Y":
|
||||
flipped ^= 1 << shift
|
||||
phase *= 1j * (1 - 2 * bit)
|
||||
elif name == "Z":
|
||||
phase *= 1 - 2 * bit
|
||||
elif name != "I":
|
||||
raise ValueError(f"Unsupported Pauli {name!r}.")
|
||||
value += coeff * np.vdot(state[flipped], phase * state)
|
||||
return float(value.real)
|
||||
|
||||
|
||||
def run_vidal(circuit, terms, nqubits, bond, tensor_module):
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return float(executor.expectation_pauli_sum(terms))
|
||||
|
||||
|
||||
def run_segment_mpi(circuit, terms, nqubits, bond, tensor_module, comm):
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return executor.expectation_pauli_sum_root(terms)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=16)
|
||||
parser.add_argument("--nlayers", type=int, default=6)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("torch", "numpy"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument(
|
||||
"--circuits",
|
||||
nargs="+",
|
||||
default=("brickwall", "reversed_cnot", "rx_ry_cz"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--observables",
|
||||
nargs="+",
|
||||
default=("ring_xz", "open_zz", "mixed_local"),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
comm = None
|
||||
rank = 0
|
||||
size = 1
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
if rank == 0:
|
||||
mode = f"vidal-segment-mpi/{size}" if args.mpi else "vidal"
|
||||
print(
|
||||
f"mode={mode} nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={args.bond} tensor_module={args.tensor_module}"
|
||||
)
|
||||
print("circuit observable exact value abs_error seconds")
|
||||
|
||||
for circuit_kind in args.circuits:
|
||||
circuit = build_circuit(circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
exact = None
|
||||
if rank == 0:
|
||||
exact_values = {
|
||||
obs: exact_pauli_sum(
|
||||
circuit, observable_terms(obs, args.nqubits), args.nqubits
|
||||
)
|
||||
for obs in args.observables
|
||||
}
|
||||
else:
|
||||
exact_values = None
|
||||
if comm is not None:
|
||||
exact_values = comm.bcast(exact_values, root=0)
|
||||
|
||||
for obs_kind in args.observables:
|
||||
terms = observable_terms(obs_kind, args.nqubits)
|
||||
start = time.perf_counter()
|
||||
if args.mpi:
|
||||
value = run_segment_mpi(
|
||||
circuit,
|
||||
terms,
|
||||
args.nqubits,
|
||||
args.bond,
|
||||
args.tensor_module,
|
||||
comm,
|
||||
)
|
||||
else:
|
||||
value = run_vidal(
|
||||
circuit, terms, args.nqubits, args.bond, args.tensor_module
|
||||
)
|
||||
if rank != 0:
|
||||
continue
|
||||
elapsed = time.perf_counter() - start
|
||||
exact = exact_values[obs_kind]
|
||||
print(
|
||||
f"{circuit_kind} {obs_kind} {exact:.16e} {value:.16e} "
|
||||
f"{abs(value - exact):.6e} {elapsed:.3f}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,209 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind in ("rxx_rzz", "scramble"):
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind == "reversed_cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 else gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 == 0 else gates.CNOT(q, q + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
elif kind == "scramble":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def open_zz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 1):
|
||||
form += (1.0 / (nqubits - 1)) * Z(q) * Z(q + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def range2_xx(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 2):
|
||||
form += (1.0 / (nqubits - 2)) * X(q) * X(q + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def dense_observable(nqubits, qubits, seed, dim):
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def observables_for_case(nqubits, seed):
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
return [
|
||||
("boundary_ZZ_q1", hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))),
|
||||
("boundary_ZZ_q2", hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))),
|
||||
("boundary_ZZ_q3", hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))),
|
||||
(
|
||||
"long_Z_5_sites",
|
||||
hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last)),
|
||||
),
|
||||
(
|
||||
"mixed_XZYZX",
|
||||
hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last)),
|
||||
),
|
||||
("ring_xz", ring_xz(nqubits)),
|
||||
("open_zz", open_zz(nqubits)),
|
||||
("range2_xx", range2_xx(nqubits)),
|
||||
("complex_iZ0", hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))),
|
||||
("dense2_mid", dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)),
|
||||
("dense3_spread", dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)),
|
||||
]
|
||||
|
||||
|
||||
def run_case(args):
|
||||
set_torch_threads(args.torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
circuit = build_circuit(args.kind, args.nqubits, args.nlayers, args.seed)
|
||||
observables = observables_for_case(args.nqubits, args.seed)
|
||||
if args.obs_filter:
|
||||
wanted = set(args.obs_filter.split(","))
|
||||
observables = [(name, obs) for name, obs in observables if name in wanted]
|
||||
if not observables:
|
||||
raise ValueError(f"OBS_FILTER matched no observables: {args.obs_filter!r}")
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
"case "
|
||||
f"label={args.label} kind={args.kind} ranks={size} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} gates={len(circuit.queue)} "
|
||||
f"bond={format_optional(args.bond)} "
|
||||
f"cut_ratio={format_optional(args.cut_ratio)} "
|
||||
f"torch_threads={args.torch_threads} seed={args.seed} "
|
||||
f"obs_filter={args.obs_filter or 'all'}",
|
||||
flush=True,
|
||||
)
|
||||
print(
|
||||
"observable value seconds trunc_sum trunc_max status",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for obs_name, observable in observables:
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
status = "ok"
|
||||
except Exception as exc: # pragma: no cover - printed for manual runs
|
||||
value = np.nan
|
||||
status = type(exc).__name__ + ":" + str(exc).split("\n", 1)[0]
|
||||
seconds = time.perf_counter() - start
|
||||
|
||||
if rank == 0:
|
||||
print(
|
||||
f"{obs_name} {value!r} {seconds:.3f} "
|
||||
f"{backend.last_truncation_error:.6e} "
|
||||
f"{backend.last_max_truncation_error:.6e} {status}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--label", required=True)
|
||||
parser.add_argument("--kind", choices=("reversed_cnot", "rxx_rzz", "scramble"), required=True)
|
||||
parser.add_argument("--nqubits", type=int, required=True)
|
||||
parser.add_argument("--nlayers", type=int, required=True)
|
||||
parser.add_argument("--bond", type=optional_int, required=True)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, required=True)
|
||||
parser.add_argument("--seed", type=int, required=True)
|
||||
parser.add_argument("--torch-threads", type=int, required=True)
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
run_case(parser.parse_args())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user