简化代码；加入.venv下内容

2026-05-18 02:47:40 +08:00
parent ef3d7e9ee6
commit 28080dff1d
15 changed files with 17145 additions and 118 deletions
--- a/.venv/lib/python3.12/site-packages/cotengra/contract.py
+++ b/.venv/lib/python3.12/site-packages/cotengra/contract.py
--- a/.venv/lib/python3.12/site-packages/cotengra/core.py
+++ b/.venv/lib/python3.12/site-packages/cotengra/core.py
--- a/.venv/lib/python3.12/site-packages/cotengra/hyperoptimizers/hyper.py
+++ b/.venv/lib/python3.12/site-packages/cotengra/hyperoptimizers/hyper.py
--- a/.venv/lib/python3.12/site-packages/cotengra/parallel.py
+++ b/.venv/lib/python3.12/site-packages/cotengra/parallel.py
@@ -0,0 +1,583 @@
+"""Interface for parallelism."""
+
+import atexit
+import collections
+import functools
+import importlib
+import inspect
+import numbers
+import operator
+import warnings
+
+_AUTO_BACKEND = None
+
+# check for loky, joblib (vendors loky), then default to concurrent.futures
+have_loky = importlib.util.find_spec("loky") is not None
+have_joblib = importlib.util.find_spec("joblib") is not None
+if have_loky or have_joblib:
+    _DEFAULT_BACKEND = "loky"
+else:
+    _DEFAULT_BACKEND = "concurrent.futures"
+
+
+@functools.lru_cache(None)
+def choose_default_num_workers():
+    import os
+
+    if "COTENGRA_NUM_WORKERS" in os.environ:
+        return int(os.environ["COTENGRA_NUM_WORKERS"])
+
+    if "OMP_NUM_THREADS" in os.environ:
+        return int(os.environ["OMP_NUM_THREADS"])
+
+    return os.cpu_count()
+
+
+def get_pool(n_workers=None, maybe_create=False, backend=None):
+    """Get a parallel pool."""
+    if backend is None:
+        backend = _DEFAULT_BACKEND
+
+    if backend == "dask":
+        return _get_pool_dask(n_workers=n_workers, maybe_create=maybe_create)
+
+    if backend == "ray":
+        return _get_pool_ray(n_workers=n_workers, maybe_create=maybe_create)
+
+    # above backends are distributed, don't specify n_workers
+    if n_workers is None:
+        n_workers = choose_default_num_workers()
+
+    if backend == "loky":
+        get_reusable_executor = get_loky_get_reusable_executor()
+        return get_reusable_executor(max_workers=n_workers)
+
+    if backend == "concurrent.futures":
+        return _get_process_pool_cf(n_workers=n_workers)
+
+    if backend == "threads":
+        return _get_thread_pool_cf(n_workers=n_workers)
+
+
+@functools.lru_cache(None)
+def _infer_backed_cached(pool_class):
+    if pool_class.__name__ == "RayExecutor":
+        return "ray"
+
+    path = pool_class.__module__.split(".")
+
+    if path[0] == "concurrent":
+        return "concurrent.futures"
+
+    if path[0] == "joblib":
+        return "loky"
+
+    if path[0] == "distributed":
+        return "dask"
+
+    return path[0]
+
+
+def _infer_backend(pool):
+    """Return the backend type of ``pool`` - cached for speed."""
+    return _infer_backed_cached(pool.__class__)
+
+
+def get_n_workers(pool=None):
+    """Extract how many workers our pool has (mostly for working out how many
+    tasks to pre-dispatch).
+    """
+    if pool is None:
+        pool = get_pool()
+
+    try:
+        return pool._max_workers
+    except AttributeError:
+        pass
+
+    backend = _infer_backend(pool)
+
+    if backend == "dask":
+        workers = pool.scheduler_info(n_workers=-1)["workers"]
+        return sum(int(w.get("nthreads", 1) or 1) for w in workers.values())
+
+    if backend == "ray":
+        while True:
+            try:
+                return int(get_ray().available_resources()["CPU"])
+            except KeyError:
+                import time
+
+                time.sleep(1e-3)
+
+    if backend == "mpi4py":
+        from mpi4py import MPI
+
+        return MPI.COMM_WORLD.size
+
+    raise ValueError(f"Can't find number of workers in pool {pool}.")
+
+
+def parse_parallel_arg(parallel):
+    """ """
+    global _AUTO_BACKEND
+
+    if parallel == "auto":
+        return get_pool(maybe_create=False, backend=_AUTO_BACKEND)
+
+    if parallel is False:
+        return None
+
+    if parallel is True:
+        if _AUTO_BACKEND is None:
+            _AUTO_BACKEND = _DEFAULT_BACKEND
+        parallel = _AUTO_BACKEND
+
+    if isinstance(parallel, numbers.Integral):
+        _AUTO_BACKEND = _DEFAULT_BACKEND
+        return get_pool(
+            n_workers=parallel, maybe_create=True, backend=_DEFAULT_BACKEND
+        )
+
+    if parallel == "loky":
+        return get_pool(maybe_create=True, backend="loky")
+
+    if parallel == "concurrent.futures":
+        return get_pool(maybe_create=True, backend="concurrent.futures")
+
+    if parallel == "threads":
+        return get_pool(maybe_create=True, backend="threads")
+
+    if parallel == "dask":
+        _AUTO_BACKEND = "dask"
+        return get_pool(maybe_create=True, backend="dask")
+
+    if parallel == "ray":
+        _AUTO_BACKEND = "ray"
+        return get_pool(maybe_create=True, backend="ray")
+
+    return parallel
+
+
+def set_parallel_backend(backend):
+    """Create a parallel pool of type ``backend`` which registers it as the
+    default for ``'auto'`` parallel.
+    """
+    return parse_parallel_arg(backend)
+
+
+def maybe_leave_pool(pool):
+    """Logic required for nested parallelism in dask.distributed."""
+    if _infer_backend(pool) == "dask":
+        return _maybe_leave_pool_dask()
+
+
+def maybe_rejoin_pool(is_worker, pool):
+    """Logic required for nested parallelism in dask.distributed."""
+    if is_worker and _infer_backend(pool) == "dask":
+        _rejoin_pool_dask()
+
+
+def submit(pool, fn, *args, **kwargs):
+    """Interface for submitting ``fn(*args, **kwargs)`` to ``pool``."""
+    if _infer_backend(pool) == "dask":
+        kwargs.setdefault("pure", False)
+    return pool.submit(fn, *args, **kwargs)
+
+
+def scatter(pool, data):
+    """Interface for maybe turning ``data`` into a remote object or reference."""
+    if _infer_backend(pool) in ("dask", "ray"):
+        return pool.scatter(data)
+    return data
+
+
+def can_scatter(pool):
+    """Whether ``pool`` can make objects remote."""
+    return _infer_backend(pool) in ("dask", "ray")
+
+
+def should_nest(pool):
+    """Given argument ``pool`` should we try nested parallelism."""
+    if pool is None:
+        return False
+    backend = _infer_backend(pool)
+    if backend in ("ray", "dask"):
+        return backend
+    return False
+
+
+# ---------------------------------- loky ----------------------------------- #
+
+
+@functools.lru_cache(1)
+def get_loky_get_reusable_executor():
+    try:
+        from loky import get_reusable_executor
+    except ImportError:
+        from joblib.externals.loky import get_reusable_executor
+    return get_reusable_executor
+
+
+# --------------------------- concurrent.futures ---------------------------- #
+
+
+class CachedProcessPoolExecutor:
+    def __init__(self):
+        self._pool = None
+        self._n_workers = -1
+        atexit.register(self.shutdown)
+
+    def __call__(self, n_workers=None):
+        if n_workers != self._n_workers:
+            from concurrent.futures import ProcessPoolExecutor
+
+            self.shutdown()
+            self._pool = ProcessPoolExecutor(n_workers)
+            self._n_workers = n_workers
+        return self._pool
+
+    def is_initialized(self):
+        return self._pool is not None
+
+    def shutdown(self):
+        if self._pool is not None:
+            self._pool.shutdown()
+            self._pool = None
+
+    def __del__(self):
+        self.shutdown()
+
+
+ProcessPoolHandler = CachedProcessPoolExecutor()
+
+
+def _get_process_pool_cf(n_workers=None):
+    return ProcessPoolHandler(n_workers)
+
+
+class CachedThreadPoolExecutor:
+    def __init__(self):
+        self._pool = None
+        self._n_workers = -1
+        atexit.register(self.shutdown)
+
+    def __call__(self, n_workers=None):
+        if n_workers != self._n_workers:
+            from concurrent.futures import ThreadPoolExecutor
+
+            self.shutdown()
+            self._pool = ThreadPoolExecutor(n_workers)
+            self._n_workers = n_workers
+        return self._pool
+
+    def is_initialized(self):
+        return self._pool is not None
+
+    def shutdown(self):
+        if self._pool is not None:
+            self._pool.shutdown()
+            self._pool = None
+
+    def __del__(self):
+        self.shutdown()
+
+
+ThreadPoolHandler = CachedThreadPoolExecutor()
+
+
+def _get_thread_pool_cf(n_workers=None):
+    return ThreadPoolHandler(n_workers)
+
+
+# ---------------------------------- DASK ----------------------------------- #
+
+
+def _get_pool_dask(n_workers=None, maybe_create=False):
+    """Maybe get an existing or create a new dask.distrbuted client.
+
+    Parameters
+    ----------
+    n_workers : None or int, optional
+        The number of workers to request if creating a new client.
+    maybe_create : bool, optional
+        Whether to create an new local cluster and client if no existing client
+        is found.
+
+    Returns
+    -------
+    None or dask.distributed.Client
+    """
+    try:
+        from dask.distributed import get_client
+    except ImportError:
+        if not maybe_create:
+            return None
+        else:
+            raise
+
+    try:
+        client = get_client()
+    except ValueError:
+        if not maybe_create:
+            return None
+
+        import shutil
+        import tempfile
+
+        from dask.distributed import Client, LocalCluster
+
+        local_directory = tempfile.mkdtemp()
+        lc = LocalCluster(
+            n_workers=n_workers,
+            threads_per_worker=1,
+            local_directory=local_directory,
+            memory_limit=0,
+        )
+        client = Client(lc)
+
+        warnings.warn(
+            "Parallel specified but no existing global dask client found... "
+            "created one (with {} workers).".format(get_n_workers(client))
+        )
+
+        @atexit.register
+        def delete_local_dask_directory():
+            shutil.rmtree(local_directory, ignore_errors=True)
+
+    if n_workers is not None:
+        current_n_workers = get_n_workers(client)
+        if n_workers != current_n_workers:
+            warnings.warn(
+                "Found existing client (with {} workers which) doesn't match "
+                "the requested {}... using it instead.".format(
+                    current_n_workers, n_workers
+                )
+            )
+
+    return client
+
+
+def _maybe_leave_pool_dask():
+    try:
+        from dask.distributed import secede
+
+        secede()  # for nested parallelism
+        is_dask_worker = True
+    except (ImportError, ValueError):
+        is_dask_worker = False
+    return is_dask_worker
+
+
+def _rejoin_pool_dask():
+    from dask.distributed import rejoin
+
+    rejoin()
+
+
+# ----------------------------------- RAY ----------------------------------- #
+
+
+@functools.lru_cache(None)
+def get_ray():
+    """ """
+    import ray
+
+    return ray
+
+
+class RayFuture:
+    """Basic ``concurrent.futures`` like future wrapping a ray ``ObjectRef``."""
+
+    __slots__ = ("_obj", "_cancelled")
+
+    def __init__(self, obj):
+        self._obj = obj
+        self._cancelled = False
+
+    def result(self, timeout=None):
+        return get_ray().get(self._obj, timeout=timeout)
+
+    def done(self):
+        return self._cancelled or bool(
+            get_ray().wait([self._obj], timeout=0)[0]
+        )
+
+    def cancel(self):
+        get_ray().cancel(self._obj)
+        self._cancelled = True
+
+
+def _unpack_futures_tuple(x):
+    return tuple(map(_unpack_futures, x))
+
+
+def _unpack_futures_list(x):
+    return list(map(_unpack_futures, x))
+
+
+def _unpack_futures_dict(x):
+    return {k: _unpack_futures(v) for k, v in x.items()}
+
+
+def _unpack_futures_identity(x):
+    return x
+
+
+_unpack_dispatch = collections.defaultdict(
+    lambda: _unpack_futures_identity,
+    {
+        RayFuture: operator.attrgetter("_obj"),
+        tuple: _unpack_futures_tuple,
+        list: _unpack_futures_list,
+        dict: _unpack_futures_dict,
+    },
+)
+
+
+def _unpack_futures(x):
+    """Allows passing futures by reference - takes e.g. args and kwargs and
+    replaces all ``RayFuture`` objects with their underyling ``ObjectRef``
+    within all nested tuples, lists and dicts.
+
+    [Subclassing ``ObjectRef`` might avoid needing this.]
+    """
+    return _unpack_dispatch[x.__class__](x)
+
+
+@functools.lru_cache(2**14)
+def get_remote_fn(fn, **remote_opts):
+    """Cached retrieval of remote function."""
+    ray = get_ray()
+    if remote_opts:
+        return ray.remote(**remote_opts)(fn)
+    return ray.remote(fn)
+
+
+@functools.lru_cache(2**14)
+def get_fn_as_remote_object(fn):
+    ray = get_ray()
+    return ray.put(fn)
+
+
+@functools.lru_cache(None)
+def get_deploy(**remote_opts):
+    """Alternative for 'non-function' callables - e.g. partial
+    functions - pass the callable object too.
+    """
+    ray = get_ray()
+
+    def deploy(fn, *args, **kwargs):
+        return fn(*args, **kwargs)
+
+    if remote_opts:
+        return ray.remote(**remote_opts)(deploy)
+    return ray.remote(deploy)
+
+
+class RayExecutor:
+    """Basic ``concurrent.futures`` like interface using ``ray``."""
+
+    def __init__(self, *args, default_remote_opts=None, **kwargs):
+        ray = get_ray()
+        if not ray.is_initialized():
+            ray.init(*args, **kwargs)
+
+        self.default_remote_opts = (
+            {} if default_remote_opts is None else dict(default_remote_opts)
+        )
+
+    def _maybe_inject_remote_opts(self, remote_opts=None):
+        """Return the default remote options, possibly overriding some with
+        those supplied by a ``submit call``.
+        """
+        ropts = self.default_remote_opts
+        if remote_opts is not None:
+            ropts = {**ropts, **remote_opts}
+        return ropts
+
+    def submit(self, fn, *args, pure=False, remote_opts=None, **kwargs):
+        """Remotely run ``fn(*args, **kwargs)``, returning a ``RayFuture``."""
+        # want to pass futures by reference
+        args = _unpack_futures_tuple(args)
+        kwargs = _unpack_futures_dict(kwargs)
+
+        ropts = self._maybe_inject_remote_opts(remote_opts)
+
+        # this is the same test ray uses to accept functions
+        if inspect.isfunction(fn):
+            # can use the faster cached remote function
+            obj = get_remote_fn(fn, **ropts).remote(*args, **kwargs)
+        else:
+            fn_obj = get_fn_as_remote_object(fn)
+            obj = get_deploy(**ropts).remote(fn_obj, *args, **kwargs)
+
+        return RayFuture(obj)
+
+    def map(self, func, *iterables, remote_opts=None):
+        """Remote map ``func`` over arguments ``iterables``."""
+        ropts = self._maybe_inject_remote_opts(remote_opts)
+        remote_fn = get_remote_fn(func, **ropts)
+        objs = tuple(map(remote_fn.remote, *iterables))
+        ray = get_ray()
+        return map(ray.get, objs)
+
+    def scatter(self, data):
+        """Push ``data`` into the distributed store, returning an ``ObjectRef``
+        that can be supplied to ``submit`` calls for example.
+        """
+        ray = get_ray()
+        return ray.put(data)
+
+    def shutdown(self):
+        """Shutdown the parent ray cluster, this ``RayExecutor`` instance
+        itself does not need any cleanup.
+        """
+        get_ray().shutdown()
+
+
+_RAY_EXECUTOR = None
+
+
+def _get_pool_ray(n_workers=None, maybe_create=False):
+    """Maybe get an existing or create a new RayExecutor, thus initializing,
+    ray.
+
+    Parameters
+    ----------
+    n_workers : None or int, optional
+        The number of workers to request if creating a new client.
+    maybe_create : bool, optional
+        Whether to create initialize ray and return a RayExecutor if not
+        initialized already.
+
+    Returns
+    -------
+    None or RayExecutor
+    """
+    try:
+        import ray
+    except ImportError:
+        if not maybe_create:
+            return None
+        else:
+            raise
+
+    global _RAY_EXECUTOR
+
+    if (_RAY_EXECUTOR is None) or (not ray.is_initialized()):
+        if not maybe_create:
+            return None
+        _RAY_EXECUTOR = RayExecutor(num_cpus=n_workers)
+
+    if n_workers is not None:
+        current_n_workers = get_n_workers(_RAY_EXECUTOR)
+        if n_workers != current_n_workers:
+            warnings.warn(
+                "Found initialized ray (with {} workers which) doesn't match "
+                "the requested {}... sticking with old number.".format(
+                    current_n_workers, n_workers
+                )
+            )
+
+    return _RAY_EXECUTOR
--- a/.venv/lib/python3.12/site-packages/qmatchatea/py_emulator.py
+++ b/.venv/lib/python3.12/site-packages/qmatchatea/py_emulator.py
--- a/.venv/lib/python3.12/site-packages/qredtea/torchapi/qteatorchtensor.py
+++ b/.venv/lib/python3.12/site-packages/qredtea/torchapi/qteatorchtensor.py
--- a/.venv/lib/python3.12/site-packages/qtealeaves/emulator/mpi_mps_simulator.py
+++ b/.venv/lib/python3.12/site-packages/qtealeaves/emulator/mpi_mps_simulator.py
@@ -0,0 +1,691 @@
+# This code is part of qtealeaves.
+#
+# This code is licensed under the Apache License, Version 2.0. You may
+# obtain a copy of this license in the LICENSE.txt file in the root directory
+# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
+#
+# Any modifications or derivative works of this code must retain this
+# copyright notice, and modified files need to carry a notice indicating
+# that they have been altered from the originals.
+
+"""
+The module contains a the MPI version of the MPS simulator.
+
+Code for the MPI simulations should be run as:
+
+.. code-block::
+   mpiexec -n 4 python my_mpi_script.py
+
+where we used 4 processes as an example.
+"""
+import os
+
+import numpy as np
+
+from qtealeaves.convergence_parameters import TNConvergenceParameters
+from qtealeaves.tensors import TensorBackend
+from qtealeaves.tooling.mpisupport import MPI, TN_MPI_TYPES
+
+from .mps_simulator import MPS
+
+__all__ = ["MPIMPS"]
+
+
+def _mpi_array_dtype(array):
+    """Return the MPI dtype for numpy arrays and CPU tensor buffers."""
+    dtype = array.dtype
+    if hasattr(dtype, "str"):
+        return TN_MPI_TYPES[dtype.str]
+
+    # qredtea torch singular values are raw torch.Tensor objects, not
+    # QteaTorchTensor instances, so they do not expose dtype_mpi().
+    import torch
+
+    return {
+        torch.complex128: MPI.DOUBLE_COMPLEX,
+        torch.complex64: MPI.COMPLEX,
+        torch.float64: MPI.DOUBLE_PRECISION,
+        torch.float32: MPI.REAL,
+        torch.int64: MPI.INT,
+    }[dtype]
+
+
+def _mpi_send_array(comm, array, to_):
+    if hasattr(array, "resolve_conj"):
+        array = array.resolve_conj().contiguous()
+    comm.Send([array, _mpi_array_dtype(array)], to_)
+
+
+def _mpi_empty_like(array, shape):
+    if hasattr(array, "resolve_conj"):
+        import torch
+
+        return torch.empty(shape, dtype=array.dtype, device="cpu")
+    return np.empty(shape, array.dtype)
+
+
+def _mpi_recv_array(comm, template, shape, from_):
+    array = _mpi_empty_like(template, shape)
+    comm.Recv([array, _mpi_array_dtype(array)], from_)
+    if hasattr(template, "device") and hasattr(array, "to"):
+        array = array.to(device=template.device)
+    return array
+
+
+# pylint: disable-next=too-many-instance-attributes
+class MPIMPS(MPS):
+    """
+    MPI version of the MPS emulator that divides the MPS between the different nodes
+
+    Parameters
+    ----------
+    num_sites: int
+        Number of sites
+    convergence_parameters: :py:class:`TNConvergenceParameters`
+        Class for handling convergence parameters. In particular, in the MPS simulator we are
+        interested in:
+        - the *maximum bond dimension* :math:`\\chi`;
+        - the *cut ratio* :math:`\\epsilon` after which the singular
+            values are neglected, i.e. if :math:`\\lamda_1` is the
+            bigger singular values then after an SVD we neglect all the
+            singular values such that :math:`\\frac{\\lambda_i}{\\lambda_1}\\leq\\epsilon`
+    local_dim: int or list of ints, optional
+        Local dimension of the degrees of freedom. Default to 2.
+        If a list is given, then it must have length num_sites.
+    initialize: str, optional
+        The method for the initialization. Default to "vacuum"
+        Available:
+        - "vacuum", for the |000...0> state
+        - "random", for a random state at given bond dimension
+    tensor_backend : `None` or instance of :class:`TensorBackend`
+        Default for `None` is :class:`QteaTensor` with np.complex128 on CPU.
+
+    """
+
+    # pylint: disable-next=too-many-arguments
+    def __init__(
+        self,
+        num_sites,
+        convergence_parameters,
+        local_dim=2,
+        initialize="vacuum",
+        tensor_backend=None,
+    ):
+        if MPI is None:
+            raise ImportError("No module mpi4py found in python environment")
+        # MPI variables
+        # pylint: disable-next=c-extension-no-member
+        self.comm = MPI.COMM_WORLD
+        self.size = self.comm.Get_size()
+        self.rank = self.comm.Get_rank()
+        self.tot_sites = num_sites
+
+        # Number of sites in the local MPS
+        modulus = num_sites % self.size
+        local_num_size = int(np.floor(num_sites // self.size))
+        self.indexes = [0] + [
+            local_num_size + 1 if ii < modulus else local_num_size
+            for ii in range(self.size)
+        ]
+        local_num_size = self.indexes[self.rank + 1]
+
+        # indexes takes into account which indexes are in each core
+        self.indexes = np.cumsum(self.indexes)
+
+        # The par_map is a dicrionary where the index is the position of the
+        # sites in the full chain, while the value the position on the
+        # subchain in this process
+        self.par_map = dict(
+            zip(
+                np.arange(
+                    self.indexes[self.rank], self.indexes[self.rank + 1], dtype=int
+                ),
+                np.arange(local_num_size, dtype=int),
+            )
+        )
+
+        # Auxiliary site for the boundaries
+        if self.rank < self.size - 1:
+            local_num_size += 1
+
+        if not np.isscalar(local_dim):
+            local_dim = local_dim[
+                self.indexes[self.rank] : self.indexes[self.rank + 1]
+                + int(self.rank != (self.size - 1))
+            ]
+
+        super().__init__(
+            local_num_size,
+            convergence_parameters,
+            local_dim=local_dim,
+            initialize=initialize,
+            tensor_backend=tensor_backend,
+        )
+
+        # MPS initializetion not aware of device
+        self.convert(self.tensor_backend.dtype, self.tensor_backend.memory_device)
+
+    @property
+    def mpi_dtype(self):
+        """Return the MPI version of the MPS dtype (going via first tensor)"""
+        return TN_MPI_TYPES[np.dtype(self[0].dtype).str]
+
+    def get_tensor_of_site(self, idx):
+        """Retrieve tensor of specifc site."""
+        return self[self.par_map[idx]]
+
+    def apply_one_site_operator(self, op, pos):
+        """
+        Applies a one operator `op` to the site `pos` of the MPIMPS.
+        Instead of communicating the changes on the boundaries we
+        perform an additional contraction.
+
+        Parameters
+        ----------
+        op: numpy array shape (local_dim, local_dim)
+            Matrix representation of the quantum gate
+        pos: int
+            Position of the qubit where to apply `op`.
+        """
+        # Apply the gate on the right MPS
+        if pos in self.par_map:
+            super().apply_one_site_operator(op, self.par_map[pos])
+
+        # For one-qubit gates it is more convenient to apply them both to
+        # the real and auxiliary qubits if they are on the boundaries
+        elif pos - 1 in self.par_map:
+            super().apply_one_site_operator(op, self.num_sites - 1)
+
+        return None
+
+    # pylint: disable-next=too-many-arguments
+    def apply_two_site_operator(self, op, pos, swap=False, svd=None, parallel=None):
+        """
+        Applies a two-site operator `op` to the site `pos`, `pos+1` of the MPS.
+        Then, perform the necessary communications between the interested
+        process and the process
+
+        Parameters
+        ----------
+        op: numpy array shape (local_dim, local_dim, local_dim, local_dim)
+            Matrix representation of the quantum gate
+        pos: int or list of ints
+            Position of the qubit where to apply `op`. If a list is passed,
+            the two sites should be adjacent. The first index is assumed to
+            be the control, and the second the target. The swap argument is
+            overwritten if a list is passed.
+        swap: bool
+            If True swaps the operator. This means that instead of the
+            first contraction in the following we get the second.
+            It is written is a list of pos is passed.
+        svd : None
+            Required for compatibility. Can be only True.
+        parallel: None
+            Required for compatibility. Can be only True
+
+        Returns
+        -------
+        singular_values_cutted: ndarray
+            Array of singular values cutted, normalized to the biggest singular value
+
+        """
+        if not np.isscalar(pos) and len(pos) == 2:
+            pos = min(pos[0], pos[1])
+        elif not np.isscalar(pos):
+            raise ValueError(
+                f"pos should be only scalar or len 2 array-like, not len {len(pos)}"
+            )
+
+        # Hardcoded but necessary for compatibility
+        svd = True
+        if parallel is None:
+            parallel_env = os.environ.get("QTEALEAVES_MPIMPS_PARALLEL", "1").lower()
+            parallel = parallel_env not in ("0", "false", "no", "off")
+
+        if pos in self.par_map:
+            res = super().apply_two_site_operator(
+                op, self.par_map[pos], swap, svd=svd, parallel=parallel
+            )
+
+            # Send the information back to the auxiliary if it was the first site
+            if self.par_map[pos] == 0 and self.rank > 0:
+                self.mpi_send_tensor(self[0], to_=self.rank - 1)
+                _mpi_send_array(self.comm, self.singvals[1], self.rank - 1)
+
+            # Send the information towards the next if it was the last site
+            elif self.par_map[pos] == self.num_sites - 2 and self.rank < self.size - 1:
+                self.mpi_send_tensor(self[self.num_sites - 1], to_=self.rank + 1)
+                _mpi_send_array(
+                    self.comm, self.singvals[self.num_sites - 1], self.rank + 1
+                )
+
+        else:
+            res = []
+            # Receive the information from the MPS on the right
+            if pos == self.indexes[self.rank + 1] and self.rank < self.size - 1:
+                tens = self.mpi_receive_tensor(from_=self.rank + 1)
+
+                self[self.num_sites - 1] = tens
+
+                singvals = _mpi_recv_array(
+                    self.comm,
+                    self.singvals[self.num_sites],
+                    tens.shape[2],
+                    self.rank + 1,
+                )
+                self._singvals[self.num_sites] = singvals
+
+            # Receive the information from the MPS from the left
+            if pos == self.indexes[self.rank] - 1 and self.rank > 0:
+                tens = self.mpi_receive_tensor(from_=self.rank - 1)
+                self[0] = tens
+
+                singvals = _mpi_recv_array(
+                    self.comm,
+                    self.singvals[0],
+                    tens.shape[0],
+                    self.rank - 1,
+                )
+                self._singvals[0] = singvals
+
+        return res
+
+    def apply_projective_operator(self, site, selected_output=None, remove=False):
+        """
+        Apply a projective operator to the site **site**, and give the measurement as output.
+        You can also decide to select a given output for the measurement, if the probability is
+        non-zero. Finally, you have the possibility of removing the site after the measurement.
+
+        Parameters
+        ----------
+        site: int
+            Index of the site you want to measure
+        selected_output: int, optional
+            If provided, the selected state is measured. Throw an error if the probability of the
+            state is 0
+        remove: bool, optional
+            If True, the measured index is traced away after the measurement. Default to False.
+
+        Returns
+        -------
+        meas_state: int | None
+            Measured state or None if site not in this part of the MPI-MPS.
+        state_prob : float | None
+            Probability of measuring the output state or None if site not
+            in this part of the MPI-MPS.
+        """
+        self.reinstall_isometry_serial()
+        if site in self.par_map:
+            res = super().apply_projective_operator(
+                self.par_map[site], selected_output, remove
+            )
+        else:
+            res = (None, None)
+
+        # Move informations to further right
+        self.reinstall_isometry_serial(left=False, from_site=site)
+        # Move information to the left
+        self.reinstall_isometry_serial()
+
+        return res
+
+    # pylint: disable-next=arguments-differ
+    def reinstall_isometry_serial(self, left=False, from_site=None):
+        """
+        Reinstall the isometry center on position 0 of the full MPS.
+
+        This step is serial because we have to serially pass the information
+        along the MPS. It cannot be parallelized.
+
+        Parameters
+        ----------
+        left: bool, optional
+            If True, reinstall the isometry to the left.
+            If False, to the right. Defaulto to False
+        from_site: int, optional
+            The site from which the isometrization should start.
+            By default None, i.e. the other end of the MPS chain.
+
+        Returns
+        -------
+        None
+        """
+        if from_site is None:
+            from_site = self.num_sites - 1 if left else 0
+        extrem = np.nonzero(from_site <= self.indexes)[0][0]
+
+        if left:
+            boundaries = (extrem, -1, -1)
+            tidx = 0
+            to_ = self.rank - 1
+            from_ = self.rank + 1
+        else:
+            boundaries = (extrem, self.size, 1)
+            tidx = self.num_sites - 1
+            to_ = self.rank + 1
+            from_ = self.rank - 1
+
+        for ii in range(*boundaries):
+            if self.rank == ii:
+                self._first_non_orthogonal_left = self.num_sites - 1
+                self._first_non_orthogonal_right = self.num_sites - 1
+                requires_singvals = self._requires_singvals
+                self._requires_singvals = True
+                if left:
+                    self.right_canonize(0, False, True)
+                else:
+                    self.left_canonize(self.num_sites - 1, False, True)
+                self._requires_singvals = requires_singvals
+
+                # Send tensor
+                if (self.rank > 0 and left) or (self.rank + 1 < self.size and not left):
+                    self.mpi_send_tensor(self[tidx], to_=to_)
+
+            elif (self.rank == ii - 1 and left) or (self.rank == ii + 1 and not left):
+                # Receive tensor
+                tens = self.mpi_receive_tensor(from_=from_)
+                self[self.num_sites - 1 - tidx] = tens
+
+    # pylint: disable-next=arguments-differ
+    def reinstall_isometry_parallel(self, num_cycles):
+        """
+        Reinstall the isometry by applying identities to all even sites and
+        to all odd sites, and repeating for `num_cycles` cycles.
+        The reinstallation is exact for `num_cycles=num_sites/2`.
+        Method from https://arxiv.org/abs/2312.02667
+
+        This step is serial because we have to serially pass the information
+        along the MPS. It cannot be parallelized.
+
+        Parameters
+        ----------
+        num_cycles: int
+            Number of cycles for reinstalling the isometry
+
+        Returns
+        -------
+        None
+        """
+        for _ in range(num_cycles):
+            # Apply on all even sites
+            for ii in range(0, self.tot_sites - 1, 2):
+                self.apply_two_site_operator(
+                    self[0].eye_like(4), ii, svd=True, parallel=True
+                )
+            # Apply on all odd sites
+            for ii in range(1, self.tot_sites - 1, 2):
+                self.apply_two_site_operator(
+                    self[0].eye_like(4), ii, svd=True, parallel=True
+                )
+
+    def mpi_gather_tn(self):
+        """
+        Gather the tensors on process 0.
+        We do not use MPI.comm.Gather because we would gather lists of np.arrays
+        without using the np.array advantages, making it slower than the single
+        communications.
+
+        Returns
+        -------
+        list on np.ndarray or None
+            List of tensors on the rank 0 process, None on the others
+        """
+        self.comm.Barrier()
+        if self.rank != 0:
+            num_tensors = (
+                self.num_sites if self.rank == self.size - 1 else self.num_sites - 1
+            )
+            for jj in range(num_tensors):
+                self.mpi_send_tensor(self[jj], to_=0)
+            tensor_list = None
+        else:
+            tensor_list = [None for _ in range(self.tot_sites)]
+            tensor_list[: self.num_sites - 1] = self.tensors[:-1]
+
+            tidx = self.num_sites - 1
+            for ii in range(1, self.size):
+                num_tensors = self.indexes[ii + 1] - self.indexes[ii]
+                for jj in range(num_tensors):
+                    tens = self.mpi_receive_tensor(from_=ii)
+                    tensor_list[tidx + jj] = tens
+                tidx += num_tensors
+
+        self.comm.Barrier()
+
+        return tensor_list
+
+    def mpi_scatter_tn(self, tensor_list):
+        """
+        Scatter the tensors on process 0.
+        We do not use MPI.comm.Scatter because we would gather lists of np.arrays
+        without using the np.array advantages, making it slower than the single
+        communications.
+
+        Parameters
+        ----------
+        tensor_list : list of lists of np.ndarrays
+            The index i of the list is sent to the rank i
+
+        Returns
+        -------
+        list on np.ndarray or None
+            List of tensors on the rank 0 process, None on the others
+        """
+        self.comm.Barrier()
+        if self.rank == 0:
+            for ridx, sub_tensorlist in enumerate(tensor_list[1:]):
+                for idx, tens in enumerate(sub_tensorlist):
+                    self.mpi_send_tensor(tens, to_=ridx + 1)
+
+            tensor_list = tensor_list[0]
+        else:
+            num_tensors = len(tensor_list[self.rank])
+            tensor_list = [None for _ in range(num_tensors)]
+            for idx in range(num_tensors):
+                tens = self.mpi_receive_tensor(from_=0)
+                tensor_list[idx] = tens
+
+        self.comm.Barrier()
+
+        return tensor_list
+
+    def to_tensor_list(self):
+        """
+        Return the tensor list of the full MPS. Thus, here there are
+        communications between the different processes and all the tensorlist
+        is returned on process 0
+
+        Returns
+        -------
+        list of np.ndarray or None
+            List of tensors on the rank 0 process, None on the others
+        """
+        return self.mpi_gather_tn()
+
+    def to_statevector(self, qiskit_order=False, max_qubit_equivalent=20):
+        """
+        Serially compute the statevector
+
+        Parameters
+        ----------
+        qiskit_order: bool, optional
+            weather to use qiskit ordering or the theoretical one. For
+            example the state |011> has 0 in the first position for the
+            theoretical ordering, while for qiskit ordering it is on the
+            last position.
+        max_qubit_equivalent: int, optional
+            Maximum number of qubit sites the MPS can have and still be
+            transformed into a statevector.
+            If the number of sites is greater, it will throw an exception.
+            Default to 20.
+
+        Returns
+        -------
+        np.ndarray or None
+            Statevector on process 0, None on the others
+        """
+
+        tensorlist = self.to_tensor_list()
+        if self.rank == 0:
+            mps = MPS.from_tensor_list(tensorlist)
+            statevect = mps.to_statevector(qiskit_order, max_qubit_equivalent)
+        else:
+            statevect = None
+
+        return statevect
+
+    @classmethod
+    def from_tensor_list(
+        cls,
+        tensor_list,
+        conv_params=None,
+        tensor_backend=None,
+        target_device=None,
+    ):
+        """
+        Initialize the MPS tensors using a list of correctly shaped tensors
+
+        Parameters
+        ----------
+        tensor_list : list of ndarrays or cupy arrays
+            List of tensor for initializing the MPS
+        conv_params : :py:class:`TNConvergenceParameters`, optional
+            Convergence parameters for the new MPS. If None, the maximum bond
+            bond dimension possible is assumed, and a cut_ratio=1e-9.
+            Default to None.
+        tensor_backend : `None` or instance of :class:`TensorBackend`
+            Default for `None` is :class:`QteaTensor` with np.complex128 on CPU.
+        target_device: None | str, optional
+            If `None`, take memory device of tensor backend.
+            If string is `any`, do not convert. Otherwise,
+            use string as device string.
+
+        Returns
+        -------
+        obj : :py:class:`MPIMPS`
+            The MPIMPS class
+        """
+        mismatches = [
+            tensor_list[ii].shape[2] != tensor_list[ii + 1].shape[0]
+            for ii in range(len(tensor_list) - 1)
+        ]
+        if any(mismatches):
+            msg = f"Mismatches for tensors equals to True: {mismatches}."
+            raise ValueError(f"Dimension mismatch when constructing MPS:{msg}")
+
+        if conv_params is None:
+            max_bond_dim = max(elem.shape[2] for elem in tensor_list)
+            conv_params = TNConvergenceParameters(max_bond_dimension=int(max_bond_dim))
+        if tensor_backend is None:
+            # Have to resolve it here in case target device is not given
+            tensor_backend = TensorBackend()
+        if target_device is None:
+            target_device = tensor_backend.memory_device
+        elif target_device == "any":
+            target_device = None
+
+        local_dim = [elem.shape[1] for elem in tensor_list]
+        obj = cls(
+            len(tensor_list), conv_params, local_dim, tensor_backend=tensor_backend
+        )
+
+        # Convert data type (lateron device if GPU enabled?)
+        for elem in tensor_list:
+            elem.convert(obj.tensor_backend.dtype, target_device)
+
+        if obj.rank == 0:
+            tensorlist = [
+                tensor_list[
+                    obj.indexes[rank] : obj.indexes[rank + 1]
+                    + int(rank != obj.size - 1)
+                ]
+                for rank in range(obj.size)
+            ]
+        else:
+            list_sizes = obj.indexes[1:] - obj.indexes[:-1] + 1
+            list_sizes[-1] -= 1
+            tensorlist = [
+                [None for _ in range(list_sizes[rank])] for rank in range(obj.size)
+            ]
+
+        tensor_list = obj.mpi_scatter_tn(tensorlist)
+        obj._tensors = tensor_list
+
+        return obj
+
+    @classmethod
+    def from_statevector(
+        cls,
+        statevector,
+        local_dim=2,
+        conv_params=None,
+        tensor_backend=None,
+    ):
+        """Serially decompose the statevector and then initialize the MPS"""
+        mps = MPS.from_statevector(
+            statevector, local_dim, conv_params, tensor_backend=tensor_backend
+        )
+
+        return cls.from_tensor_list(
+            mps.to_tensor_list(), conv_params, tensor_backend=tensor_backend
+        )
+
+    # ---------------------------
+    # ----- MEASURE METHODS -----
+    # ---------------------------
+
+    def meas_local(self, op_list):
+        """
+        Measure a local observable along all sites of the MPS
+
+        Parameters
+        ----------
+        op_list : list of :class:`_AbstractQteaTensor`
+            local operator to measure on each site
+
+        Return
+        ------
+        measures : ndarray, shape (num_sites)
+            Measures of the local operator along each site on rank-0
+        """
+        res = super().meas_local(op_list)
+
+        # Call back on the site 0 the results
+        if self.rank != 0:
+            self.comm.Send([res, self.mpi_dtype[res.dtype.str]], 0)
+            tot_res = None
+        else:
+            tot_res = np.empty(self.tot_sites, dtype=res.dtype)
+            tot_res[: self.num_sites - 1] = res[:-1]
+
+            tidx = self.num_sites - 1
+            for ii in range(1, self.size):
+                num_tensors = self.indexes[ii] - self.indexes[ii - 1]
+                self.comm.Recv(
+                    [tot_res[tidx : tidx + num_tensors], self.mpi_dtype[res.dtype.str]],
+                    ii,
+                )
+                tidx += num_tensors
+
+        return tot_res
+
+    def _get_eff_op_on_pos(self, pos):
+        """
+        Obtain the list of effective operators adjacent
+        to the position pos and the index where they should
+        be contracted
+
+        Parameters
+        ----------
+        pos : int
+            Index of the tensor w.r.t. which we have to retrieve
+            the effective operators
+
+        Returns
+        -------
+        list of IndexedOperators
+            List of effective operators
+        list of ints
+            Indexes where the operators should be contracted
+        """
+        raise NotImplementedError("This function has to be overwritten")
--- a/.venv/lib/python3.12/site-packages/quimb/tensor/tn1d/core.py
+++ b/.venv/lib/python3.12/site-packages/quimb/tensor/tn1d/core.py
--- a/src/qibotn/init.py
+++ b/src/qibotn/init.py
@@ -1,5 +1,29 @@
 import importlib.metadata as im

-from qibotn.backends import MetaBackend
-
 __version__ = im.version(__package__)
+
+_LAZY_EXPORTS = {
+    "MetaBackend": ("qibotn.backends", "MetaBackend"),
+    "cpu_backend": ("qibotn.expectation_runner", "cpu_backend"),
+    "cpu_expectation": ("qibotn.expectation_runner", "cpu_expectation"),
+    "mps_expectation": ("qibotn.expectation_runner", "mps_expectation"),
+    "cpu_runcard": ("qibotn.expectation_runner", "cpu_runcard"),
+    "pauli_pattern": ("qibotn.observables", "pauli_pattern"),
+    "pauli_sum": ("qibotn.observables", "pauli_sum"),
+}
+
+
+def __getattr__(name):
+    try:
+        module_name, object_name = _LAZY_EXPORTS[name]
+    except KeyError:
+        raise AttributeError(f"module {__name__!r} has no attribute {name!r}") from None
+
+    from importlib import import_module
+
+    value = getattr(import_module(module_name), object_name)
+    globals()[name] = value
+    return value
+
+
+__all__ = sorted([*_LAZY_EXPORTS, "__version__"])
--- a/src/qibotn/backends/init.py
+++ b/src/qibotn/backends/init.py
@@ -1,10 +1,6 @@
-from typing import Union
-
 from qibo.config import raise_error

 from qibotn.backends.abstract import QibotnBackend
-from qibotn.backends.cpu import CpuTensorNet
-from qibotn.backends.cutensornet import CuTensorNet  # pylint: disable=E0401

 PLATFORMS = ("cutensornet", "cpu", "quimb", "qmatchatea", "vidal")

@@ -24,8 +20,12 @@ class MetaBackend:
        """

        if platform == "cutensornet":  # pragma: no cover
+            from qibotn.backends.cutensornet import CuTensorNet
+
            return CuTensorNet(runcard)
        elif platform == "cpu":
+            from qibotn.backends.cpu import CpuTensorNet
+
            return CpuTensorNet(runcard)
        elif platform == "quimb":  # pragma: no cover
            import qibotn.backends.quimb as qmb
@@ -55,8 +55,8 @@ class MetaBackend:
        for platform in PLATFORMS:
            try:
                MetaBackend.load(platform=platform)
-                available = True
-            except:
-                available = False
-            available_backends[platform] = available
+            except (ImportError, NotImplementedError, TypeError, ValueError):
+                available_backends[platform] = False
+            else:
+                available_backends[platform] = True
        return available_backends
--- a/src/qibotn/backends/cpu.py
+++ b/src/qibotn/backends/cpu.py
@@ -15,14 +15,9 @@ from qibo.config import raise_error
 from qibotn.backends.abstract import QibotnBackend
 from qibotn.backends.vidal import (
    _observable_mpo_tensors,
-    _operator_terms_to_mpo,
-    _symbolic_hamiltonian_to_operator_terms,
    _unsupported_reason,
 )
-from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
-from qibotn.backends.vidal_tebd import VidalTEBDExecutor
 from qibotn.observables import check_observable
-from qibotn.result import TensorNetworkResult


 def _as_bool_or_dict(value, name):
@@ -282,79 +277,35 @@ class CpuTensorNet(QibotnBackend, NumpyBackend):
    ):
        if compile_circuit is None:
            compile_circuit = self.compile_circuit
-        if preprocess:
-            if self.MPI_enabled:
-                from mpi4py import MPI
-
-                self.rank = MPI.COMM_WORLD.Get_rank()
-
-            from qibotn.backends.vidal import VidalBackend
-
-            backend = VidalBackend()
-            backend.configure_tn_simulation(
-                max_bond_dimension=self.max_bond_dimension,
-                cut_ratio=self.cut_ratio,
-                tensor_module=self.tensor_module,
-                compile_circuit=compile_circuit,
-                mpi_approach="CT" if self.MPI_enabled else "SR",
-                mpi_term_batch_size=self.mpi_term_batch_size,
-                fallback=False,
-            )
-            value = backend.expectation(
-                circuit,
-                observable,
-                preprocess=True,
-                compile_circuit=compile_circuit,
-            )
-            self.rank = getattr(backend, "rank", self.rank)
-            self.last_truncation_error = getattr(
-                backend, "last_truncation_error", np.nan
-            )
-            self.last_max_truncation_error = getattr(
-                backend, "last_max_truncation_error", np.nan
-            )
-            return value
-
-        mpo_tensors = _observable_mpo_tensors(observable, circuit.nqubits)
        if self.MPI_enabled:
            from mpi4py import MPI

-            comm = MPI.COMM_WORLD
-            self.rank = comm.Get_rank()
-            executor = SegmentVidalMPIExecutor(
-                nqubits=circuit.nqubits,
-                max_bond=self.max_bond_dimension,
-                cut_ratio=self.cut_ratio,
-                tensor_module=self.tensor_module,
-                comm=comm,
-            )
-            executor.run_circuit(circuit)
-            self.last_truncation_error = float(executor.global_truncation_error())
-            self.last_max_truncation_error = float(
-                executor.global_max_truncation_error()
-            )
-            if mpo_tensors is not None:
-                value = executor.expectation_mpo_root(mpo_tensors)
-            else:
-                terms = _symbolic_hamiltonian_to_operator_terms(observable)
-                value = executor.expectation_mpo_root(
-                    _operator_terms_to_mpo(terms, circuit.nqubits)
-                )
-            return np.nan if self.rank != 0 else value
+            self.rank = MPI.COMM_WORLD.Get_rank()

-        executor = VidalTEBDExecutor(
-            nqubits=circuit.nqubits,
-            max_bond=self.max_bond_dimension,
+        from qibotn.backends.vidal import VidalBackend
+
+        backend = VidalBackend()
+        backend.configure_tn_simulation(
+            max_bond_dimension=self.max_bond_dimension,
            cut_ratio=self.cut_ratio,
            tensor_module=self.tensor_module,
+            compile_circuit=compile_circuit,
+            mpi_approach="CT" if self.MPI_enabled else "SR",
+            mpi_term_batch_size=self.mpi_term_batch_size,
+            fallback=False,
        )
-        executor.run_circuit(circuit)
-        self.last_truncation_error = float(executor.truncation_error)
-        self.last_max_truncation_error = float(executor.max_truncation_error)
-        if mpo_tensors is not None:
-            return executor.expectation_mpo(mpo_tensors)
-        terms = _symbolic_hamiltonian_to_operator_terms(observable)
-        return executor.expectation_mpo(_operator_terms_to_mpo(terms, circuit.nqubits))
+        value = backend.expectation(
+            circuit,
+            observable,
+            preprocess=preprocess,
+            compile_circuit=compile_circuit,
+        )
+        self.rank = getattr(backend, "rank", self.rank)
+        self.last_truncation_error = getattr(backend, "last_truncation_error", np.nan)
+        self.last_max_truncation_error = getattr(
+            backend, "last_max_truncation_error", np.nan
+        )
+        return value

    def _quimb_backend(self):
        import qibotn.backends.quimb as qmb
--- a/src/qibotn/expectation_runner.py
+++ b/src/qibotn/expectation_runner.py
@@ -12,6 +12,50 @@ from qibotn.benchmark_cases import exact_pauli_sum
 from qibotn.observables import check_observable


+def cpu_runcard(
+    observable=None,
+    *,
+    ansatz: str = "tn",
+    mpi: bool = False,
+    bond: int | None = 1024,
+    cut_ratio: float | None = 1e-12,
+    tensor_module: str = "torch",
+    quimb_backend: str = "torch",
+    dtype: str = "complex128",
+    torch_threads: int | None = 8,
+    parallel_opts: dict | None = None,
+    compile_circuit: bool = False,
+    preprocess: bool = False,
+):
+    """Build the small CPU backend runcard used throughout qibotn."""
+    return {
+        "MPI_enabled": mpi,
+        "MPS_enabled": ansatz.lower() == "mps",
+        "NCCL_enabled": False,
+        "expectation_enabled": observable if observable is not None else False,
+        "max_bond_dimension": bond,
+        "cut_ratio": cut_ratio,
+        "tensor_module": tensor_module,
+        "quimb_backend": quimb_backend,
+        "dtype": dtype,
+        "torch_threads": torch_threads,
+        "parallel_opts": parallel_opts or {},
+        "compile_circuit": compile_circuit,
+        "preprocess": preprocess,
+    }
+
+
+def cpu_backend(**kwargs):
+    """Return a configured qibotn CPU backend.
+
+    Example:
+        ``backend = cpu_backend(ansatz="mps", bond=512, torch_threads=8)``
+    """
+    from qibotn.backends.cpu import CpuTensorNet
+
+    return CpuTensorNet(cpu_runcard(**kwargs))
+
+
@dataclass
 class ExpectationConfig:
    ansatz: str = "tn"
@@ -33,6 +77,15 @@ class ExpectationResult:
    parallel_stats: list | None = None


+def _config_from_kwargs(**kwargs):
+    fields = ExpectationConfig.__dataclass_fields__
+    config_kwargs = {name: kwargs.pop(name) for name in list(kwargs) if name in fields}
+    if kwargs:
+        unknown = ", ".join(sorted(kwargs))
+        raise TypeError(f"Unknown expectation option(s): {unknown}")
+    return ExpectationConfig(**config_kwargs)
+
+
 def exact_for_observable(circuit, observable, nqubits):
    if isinstance(observable, dict) and "terms" in observable:
        terms = [
@@ -49,19 +102,18 @@ def exact_for_observable(circuit, observable, nqubits):


 def run_cpu_expectation(circuit, observable, config):
-    runcard = {
-        "MPI_enabled": config.mpi,
-        "MPS_enabled": config.ansatz.lower() == "mps",
-        "NCCL_enabled": False,
-        "expectation_enabled": observable,
-        "max_bond_dimension": config.bond,
-        "cut_ratio": config.cut_ratio,
-        "tensor_module": config.tensor_module,
-        "quimb_backend": config.quimb_backend,
-        "dtype": config.dtype,
-        "torch_threads": config.torch_threads,
-        "parallel_opts": config.parallel_opts or {},
-    }
+    runcard = cpu_runcard(
+        observable,
+        ansatz=config.ansatz,
+        mpi=config.mpi,
+        bond=config.bond,
+        cut_ratio=config.cut_ratio,
+        tensor_module=config.tensor_module,
+        quimb_backend=config.quimb_backend,
+        dtype=config.dtype,
+        torch_threads=config.torch_threads,
+        parallel_opts=config.parallel_opts,
+    )
    backend = construct_backend(
        backend="qibotn",
        platform="cpu",
@@ -80,3 +132,26 @@ def run_cpu_expectation(circuit, observable, config):
        rank=rank,
        parallel_stats=list(stats) if stats is not None else None,
    )
+
+
+def cpu_expectation(circuit, observable=None, *, return_result=False, **kwargs):
+    """Compute a CPU TN/MPS expectation with concise keyword options.
+
+    This is the preferred API for small scripts.  Common options are
+    ``ansatz="tn" | "mps"``, ``bond``, ``cut_ratio``, ``mpi``,
+    ``torch_threads``, ``quimb_backend`` and ``parallel_opts``.
+    """
+    config = _config_from_kwargs(**kwargs)
+    result = run_cpu_expectation(circuit, observable, config)
+    return result if return_result else result.value
+
+
+def mps_expectation(circuit, observable=None, *, return_result=False, **kwargs):
+    """Compute expectation using the CPU Vidal/MPS path when possible."""
+    kwargs.setdefault("ansatz", "mps")
+    return cpu_expectation(
+        circuit,
+        observable,
+        return_result=return_result,
+        **kwargs,
+    )
--- a/src/qibotn/observables.py
+++ b/src/qibotn/observables.py
@@ -4,6 +4,30 @@ from qibo import hamiltonians
 from qibo.symbols import I, X, Y, Z


+def pauli_pattern(pattern):
+    """Return the compact qibotn representation of a repeated Pauli string."""
+    return {"pauli_string_pattern": pattern}
+
+
+def pauli_sum(*terms):
+    """Return the compact qibotn representation of a Pauli sum.
+
+    Each term is ``(coefficient, operators)`` where operators are pairs like
+    ``("X", 0)``.  Example:
+
+    ``pauli_sum((0.5, [("X", 0), ("Z", 1)]), (-1.0, [("Z", 3)]))``
+    """
+    return {
+        "terms": [
+            {
+                "coefficient": coeff,
+                "operators": [(name, int(site)) for name, site in operators],
+            }
+            for coeff, operators in terms
+        ]
+    }
+
+
 def check_observable(observable, circuit_nqubit):
    """Checks the type of observable and returns the appropriate Hamiltonian."""
    if observable is None:
@@ -20,11 +44,10 @@ def check_observable(observable, circuit_nqubit):

 def build_observable(circuit_nqubit):
    """Construct the default benchmark observable used by qibotn."""
-    hamiltonian_form = 0
-    for i in range(circuit_nqubit):
-        hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit)
-
-    return hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
+    form = sum(
+        0.5 * X(i) * Z((i + 1) % circuit_nqubit) for i in range(circuit_nqubit)
+    )
+    return hamiltonians.SymbolicHamiltonian(form=form)


 def create_hamiltonian_from_dict(data, circuit_nqubit):
@@ -50,7 +73,6 @@ def create_hamiltonian_from_dict(data, circuit_nqubit):
        term_expr = full_term_expr[0]
        for op in full_term_expr[1:]:
            term_expr *= op
-
        terms.append(coeff * term_expr)

    if not terms:
@@ -84,23 +106,20 @@ def create_hamiltonian_from_pauli_pattern(pattern, circuit_nqubit):
            continue
        factor = pauli_gates[name](qubit)
        expr = factor if expr is None else expr * factor
-
-    if expr is None:
-        expr = I(0)
-
-    return hamiltonians.SymbolicHamiltonian(form=expr)
+    return hamiltonians.SymbolicHamiltonian(form=expr or I(0))


 def build_random_circuit(nqubits, nlayers, seed=42):
    """Build a random circuit with RY+RZ+CNOT layers for benchmarks."""
    import numpy as np
    from qibo import Circuit, gates
-    np.random.seed(seed)
+
+    rng = np.random.default_rng(seed)
    c = Circuit(nqubits)
    for _ in range(nlayers):
        for q in range(nqubits):
-            c.add(gates.RY(q, theta=np.random.uniform(0, 2*np.pi)))
-            c.add(gates.RZ(q, theta=np.random.uniform(0, 2*np.pi)))
+            c.add(gates.RY(q, theta=rng.uniform(0, 2 * np.pi)))
+            c.add(gates.RZ(q, theta=rng.uniform(0, 2 * np.pi)))
        for q in range(nqubits):
            c.add(gates.CNOT(q % nqubits, (q + 1) % nqubits))
    return c
--- a/src/qibotn/result.py
+++ b/src/qibotn/result.py
@@ -32,20 +32,19 @@ class TensorNetworkResult:
    statevector: ndarray

    def __post_init__(self):
-        # TODO: define the general convention when using backends different from qmatchatea
        if self.measured_probabilities is None:
-            self.measured_probabilities = {"default": self.measured_probabilities}
+            self.measured_probabilities = {}

    def probabilities(self):
        """Return calculated probabilities according to the given method."""
-        if self.prob_type == "U":
-            measured_probabilities = deepcopy(self.measured_probabilities)
-            for bitstring, prob in self.measured_probabilities[self.prob_type].items():
-                measured_probabilities[self.prob_type][bitstring] = prob[1] - prob[0]
-            probabilities = measured_probabilities[self.prob_type]
-        else:
-            probabilities = self.measured_probabilities
-        return probabilities
+        if self.prob_type != "U":
+            return self.measured_probabilities
+
+        measured_probabilities = deepcopy(self.measured_probabilities)
+        values = measured_probabilities.get(self.prob_type, {})
+        for bitstring, prob in values.items():
+            values[bitstring] = prob[1] - prob[0]
+        return values

    def frequencies(self):
        """Return frequencies if a certain number of shots has been set."""
--- a/tests/test_cpu_backend.py
+++ b/tests/test_cpu_backend.py
@@ -9,6 +9,7 @@ from qibotn.benchmark_cases import (
    build_circuit as build_benchmark_circuit,
    exact_pauli_sum,
 )
+from qibotn import cpu_expectation, mps_expectation, pauli_pattern, pauli_sum


 def build_circuit(nqubits=6):
@@ -46,6 +47,37 @@ def test_cpu_generic_tn_expectation_matches_statevector():
    assert math.isclose(value, exact, abs_tol=1e-12)


+def test_public_cpu_expectation_api_matches_statevector():
+    circuit = build_circuit()
+    observable = pauli_sum((0.5, [("X", 0), ("Z", 1)]), (-0.25, [("Z", 5)]))
+    exact = exact_pauli_sum(
+        circuit,
+        [(0.5, (("X", 0), ("Z", 1))), (-0.25, (("Z", 5),))],
+        circuit.nqubits,
+    )
+
+    value = cpu_expectation(circuit, observable, torch_threads=1)
+
+    assert math.isclose(value, exact, abs_tol=1e-12)
+
+
+def test_public_mps_expectation_api_accepts_pauli_pattern():
+    circuit = build_circuit()
+    exact_hamiltonian = hamiltonians.SymbolicHamiltonian(
+        form=X(1) * Z(2) * X(4) * Z(5)
+    )
+    exact = exact_hamiltonian.expectation_from_state(circuit().state(numpy=True))
+
+    value = mps_expectation(
+        circuit,
+        pauli_pattern("IXZ"),
+        bond=64,
+        torch_threads=1,
+    )
+
+    assert math.isclose(value, exact, abs_tol=1e-12)
+
+
 def test_cpu_mps_expectation_matches_statevector():
    circuit = build_circuit()
    observable = build_observable(circuit.nqubits)