Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4c7a10d026 | |||
| 915c24dc7b | |||
| 72f95599bb | |||
| aa122964b4 | |||
| fea8e5abc0 | |||
| ff96e36cfc | |||
| 7cebbb0820 | |||
| 57d5fbcbb0 | |||
| 5479574502 | |||
| cec0ba272a | |||
| 8b71ff96c8 | |||
| 49b27a5840 | |||
| c818ac7a6e | |||
| 0a96553bd8 | |||
| 2f5c863952 | |||
| fbae48eb3d | |||
| f776fbb04f | |||
| 5a692033a6 | |||
| a3f39a1d67 | |||
| dd222587b7 | |||
| 740828872e | |||
| 80d9c1de5a | |||
| 2c54840e7b |
13
.gitignore
vendored
13
.gitignore
vendored
@@ -2,10 +2,9 @@
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
data/
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
@@ -160,3 +159,13 @@ cython_debug/
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
.devenv
|
||||
|
||||
|
||||
# yx
|
||||
bak/
|
||||
path/
|
||||
profiles/
|
||||
vtune_expval/
|
||||
perf*
|
||||
experiments/
|
||||
references/
|
||||
14
README.md
14
README.md
@@ -12,7 +12,7 @@ Tensor Network Types:
|
||||
Tensor Network contractions to:
|
||||
|
||||
- dense vectors
|
||||
- expecation values of given Pauli string
|
||||
- expectation values of given Pauli strings or Pauli-sum observables
|
||||
|
||||
The supported HPC configurations are:
|
||||
|
||||
@@ -26,6 +26,18 @@ Currently, the supported tensor network libraries are:
|
||||
- [cuQuantum](https://github.com/NVIDIA/cuQuantum), an NVIDIA SDK of optimized libraries and tools for accelerating quantum computing workflows.
|
||||
- [quimb](https://quimb.readthedocs.io/en/latest/), an easy but fast python library for ‘quantum information many-body’ calculations, focusing primarily on tensor networks.
|
||||
|
||||
## CPU expectation benchmarks
|
||||
|
||||
The current CPU expectation entrypoint is:
|
||||
|
||||
```sh
|
||||
python -u benchmark_cpu_expectation.py --ansatz mps --nqubits 40 --nlayers 10 --bond 2048 --circuits brickwall_cnot --observables ring_xz
|
||||
```
|
||||
|
||||
Use `--ansatz tn` for the generic TN path and `--mpi` under `mpiexec` for MPI runs.
|
||||
Reusable circuit and observable builders live in `src/qibotn/benchmark_cases.py`; execution logic lives in `src/qibotn/expectation_runner.py`.
|
||||
For Vidal/MPS 1D-chain scale tests, use `run_vidal_mps_cases.sh`.
|
||||
|
||||
## Installation
|
||||
|
||||
To get started:
|
||||
|
||||
285
benchmark_cpu_expectation.py
Normal file
285
benchmark_cpu_expectation.py
Normal file
@@ -0,0 +1,285 @@
|
||||
"""CLI for CPU TN/MPS expectation benchmarks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from qibotn.benchmark_cases import (
|
||||
CIRCUITS,
|
||||
OBSERVABLES,
|
||||
build_circuit,
|
||||
observable_terms,
|
||||
parse_names,
|
||||
terms_to_dict,
|
||||
)
|
||||
from qibotn.expectation_runner import (
|
||||
ExpectationConfig,
|
||||
exact_for_observable,
|
||||
run_cpu_expectation,
|
||||
)
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def should_stop_dask(args):
|
||||
return (
|
||||
not args.keep_dask
|
||||
and args.tn_search_backend == "dask"
|
||||
and args.dask_address is not None
|
||||
and args.tn_load_tree is None
|
||||
)
|
||||
|
||||
|
||||
def stop_dask_cluster(args, rank):
|
||||
if rank != 0 or not should_stop_dask(args):
|
||||
return
|
||||
script = Path(__file__).resolve().parent / "tools" / "manage_tn_dask_cluster.sh"
|
||||
if not script.exists():
|
||||
print(f"dask_stop_skipped reason=missing_script path={script}", flush=True)
|
||||
return
|
||||
|
||||
env = os.environ.copy()
|
||||
parsed = urlparse(args.dask_address)
|
||||
if parsed.hostname:
|
||||
env.setdefault("SCHEDULER_HOST", parsed.hostname)
|
||||
if parsed.port:
|
||||
env.setdefault("SCHEDULER_PORT", str(parsed.port))
|
||||
|
||||
print("dask_stop_after_search start", flush=True)
|
||||
subprocess.run([str(script), "stop"], cwd=str(script.parent.parent), env=env, check=False)
|
||||
print("dask_stop_after_search done", flush=True)
|
||||
|
||||
|
||||
def build_parallel_opts(args):
|
||||
slicing_opts = {}
|
||||
if args.tn_target_slices is not None:
|
||||
slicing_opts["target_slices"] = args.tn_target_slices
|
||||
if args.tn_target_size is not None:
|
||||
slicing_opts["target_size"] = args.tn_target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": args.tn_search_workers or args.torch_threads,
|
||||
"max_repeats": args.tn_search_repeats,
|
||||
"max_time": args.tn_search_time,
|
||||
"print_stats": not args.no_tn_stats,
|
||||
}
|
||||
if args.tn_search_backend is not None:
|
||||
opts["search_backend"] = args.tn_search_backend
|
||||
if args.dask_address is not None:
|
||||
opts["dask_address"] = args.dask_address
|
||||
if args.tn_save_tree is not None:
|
||||
opts["save_tree_path"] = args.tn_save_tree
|
||||
if args.tn_load_tree is not None:
|
||||
opts["load_tree_path"] = args.tn_load_tree
|
||||
if args.tn_search_only:
|
||||
opts["search_only"] = True
|
||||
if args.tn_debug_trials:
|
||||
opts["debug_trials"] = True
|
||||
if args.tn_contract_implementation is not None:
|
||||
opts["contract_implementation"] = args.tn_contract_implementation
|
||||
if args.dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
return opts
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=40)
|
||||
parser.add_argument("--nlayers", type=int, default=30)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=1024)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--quimb-backend", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument(
|
||||
"--dtype",
|
||||
choices=("complex128", "complex64"),
|
||||
default="complex128",
|
||||
)
|
||||
parser.add_argument("--ansatz", choices=("tn", "mps"), default=None)
|
||||
parser.add_argument("--mps", action="store_true")
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--circuits", nargs="+", default=["brickwall_cnot"])
|
||||
parser.add_argument("--observables", nargs="+", default=["ring_xz"])
|
||||
parser.add_argument("--pauli-pattern")
|
||||
parser.add_argument("--tn-target-slices", type=int)
|
||||
parser.add_argument("--tn-target-size", type=int,default=2**32)
|
||||
parser.add_argument("--tn-search-workers", type=int)
|
||||
parser.add_argument("--tn-search-repeats", type=int, default=128)
|
||||
parser.add_argument("--tn-search-time", type=float, default=60.0)
|
||||
parser.add_argument(
|
||||
"--no-tn-stats",
|
||||
action="store_true",
|
||||
help="Do not print per-term TN search/contraction diagnostics.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-search-backend",
|
||||
choices=("processpool", "dask"),
|
||||
default="dask",
|
||||
help="Path-search backend. In MPI mode, dask search runs only on rank 0 and broadcasts the tree.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dask-address",
|
||||
help="Dask scheduler address, for example tcp://host:8786. If omitted with dask search, a local cluster is created.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dask-close-workers",
|
||||
action="store_true",
|
||||
help="After dask path search, ask the scheduler to close all currently connected workers.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-dask",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Keep an external dask cluster running after search. By default, "
|
||||
"tools/manage_tn_dask_cluster.sh stop is called after search when "
|
||||
"--dask-address is used."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-save-tree",
|
||||
help="Save searched cotengra contraction tree(s) to this pickle file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-load-tree",
|
||||
help="Load cotengra contraction tree(s) from this pickle file and skip path search.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-search-only",
|
||||
action="store_true",
|
||||
help="Only run path search and optional --tn-save-tree; skip contraction.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-debug-trials",
|
||||
action="store_true",
|
||||
help="Print dask worker summary and per-trial worker start/done logs.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-contract-implementation",
|
||||
choices=("auto", "cotengra", "autoray", "cpp"),
|
||||
help="cotengra contraction implementation for TN contraction.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
ansatz = "mps" if args.mps else (args.ansatz or "tn")
|
||||
circuits = parse_names(args.circuits, CIRCUITS, "circuits")
|
||||
observables = [] if args.pauli_pattern else parse_names(
|
||||
args.observables, OBSERVABLES, "observables"
|
||||
)
|
||||
|
||||
rank = 0
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz=ansatz,
|
||||
mpi=args.mpi,
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=args.quimb_backend,
|
||||
dtype=args.dtype,
|
||||
torch_threads=args.torch_threads,
|
||||
parallel_opts=build_parallel_opts(args),
|
||||
)
|
||||
|
||||
if rank == 0:
|
||||
mode = "MPI" if args.mpi else "serial"
|
||||
print(
|
||||
f"backend=cpu ansatz={ansatz.upper()} mode={mode} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={format_optional(args.bond)} "
|
||||
f"cut_ratio={format_optional(args.cut_ratio)} seed={args.seed} "
|
||||
f"quimb_backend={args.quimb_backend} dtype={args.dtype} "
|
||||
f"torch_threads={args.torch_threads} "
|
||||
f"tn_search_backend={args.tn_search_backend}"
|
||||
)
|
||||
print("circuit observable exact value abs_error rel_error seconds")
|
||||
|
||||
try:
|
||||
for circuit_kind in circuits:
|
||||
circuit = build_circuit(circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
named_observables = (
|
||||
[(f"pattern:{args.pauli_pattern}", {"pauli_string_pattern": args.pauli_pattern})]
|
||||
if args.pauli_pattern
|
||||
else [
|
||||
(obs_kind, terms_to_dict(observable_terms(obs_kind, args.nqubits)))
|
||||
for obs_kind in observables
|
||||
]
|
||||
)
|
||||
|
||||
for obs_name, observable in named_observables:
|
||||
exact = None
|
||||
if args.exact and rank == 0:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, observable, args.nqubits)
|
||||
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if args.mpi and result.rank != 0:
|
||||
continue
|
||||
|
||||
abs_error = float("nan") if exact is None else abs(result.value - exact)
|
||||
rel_error = (
|
||||
float("nan")
|
||||
if exact is None
|
||||
else abs_error / max(abs(exact), 1e-15)
|
||||
)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"{circuit_kind} {obs_name} {exact_text} {result.value:.16e} "
|
||||
f"{abs_error:.6e} {rel_error:.6e} {result.seconds:.3f}"
|
||||
)
|
||||
for stat in result.parallel_stats or ():
|
||||
cost = stat["path_cost"]
|
||||
search_stats = stat.get("search_stats", {})
|
||||
print(
|
||||
"tn_term_summary "
|
||||
f"term={stat.get('term_index', 0)} "
|
||||
f"search_seconds={stat.get('search_seconds', float('nan')):.3f} "
|
||||
f"contract_seconds={stat.get('contract_seconds', float('nan')):.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', 'na')} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={cost['nslices']} "
|
||||
f"log10_flops={cost['log10_flops']:.3f} "
|
||||
f"log10_write={cost['log10_write']:.3f} "
|
||||
f"log2_size={cost['log2_size']:.3f} "
|
||||
f"log10_combo={cost['log10_combo']:.3f} "
|
||||
f"peak_memory_gib={cost['peak_memory_gib']:.6g} "
|
||||
f"slicing_overhead={cost['slicing_overhead']:.6g} "
|
||||
f"rank_slices={stat.get('rank_slices', 'na')}"
|
||||
)
|
||||
finally:
|
||||
stop_dask_cluster(args, rank)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
data/tree_q25_l10.pkl
Normal file
BIN
data/tree_q25_l10.pkl
Normal file
Binary file not shown.
BIN
data/tree_q25_l10_sliced.pkl
Normal file
BIN
data/tree_q25_l10_sliced.pkl
Normal file
Binary file not shown.
BIN
data/tree_q30_l10.pkl
Normal file
BIN
data/tree_q30_l10.pkl
Normal file
Binary file not shown.
BIN
data/tree_q30_l10_sliced.pkl
Normal file
BIN
data/tree_q30_l10_sliced.pkl
Normal file
Binary file not shown.
70
doc/make.bat
70
doc/make.bat
@@ -1,35 +1,35 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
||||
|
||||
53
docs/contest_runners.md
Normal file
53
docs/contest_runners.md
Normal file
@@ -0,0 +1,53 @@
|
||||
# TN
|
||||
```bash
|
||||
# qibotn目录下
|
||||
I_MPI_FABRICS=shm:ofi \
|
||||
I_MPI_OFI_PROVIDER=tcp \
|
||||
FI_PROVIDER=tcp \
|
||||
CASE=main1 \
|
||||
OBSERVABLES=long_z_string \
|
||||
NQUBITS=34 \
|
||||
NLAYERS=20 \
|
||||
TORCH_THREADS=48 \
|
||||
SEARCH_REPEATS=2048 \
|
||||
SEARCH_TIME=300 \
|
||||
SCHEDULER_HOST=10.20.1.103 \
|
||||
WORKER_HOSTS="10.20.1.103 10.20.6.101" \
|
||||
DASK_ADDRESS="tcp://10.20.1.103:8786" \
|
||||
NWORKERS=84 \
|
||||
NTHREADS=1 \
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2" \
|
||||
tools/run_tn_dask_mpi_all.sh
|
||||
|
||||
# 单独缩并contract计算
|
||||
|
||||
I_MPI_FABRICS=shm:ofi \
|
||||
I_MPI_OFI_PROVIDER=tcp \
|
||||
FI_PROVIDER=tcp \
|
||||
mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2 \
|
||||
.venv/bin/python -u tools/tn_contest_runner.py contract \
|
||||
--mpi \
|
||||
--case main1 \
|
||||
--nqubits 34 \
|
||||
--nlayers 20 \
|
||||
--observables long_z_string \
|
||||
--tree-dir trees/contest_tn \
|
||||
--torch-threads 48 \
|
||||
--dtype complex64
|
||||
```
|
||||
|
||||
# MPS
|
||||
```
|
||||
cd /home/yx/qibotn
|
||||
|
||||
I_MPI_FABRICS=shm:ofi \
|
||||
I_MPI_OFI_PROVIDER=tcp \
|
||||
FI_PROVIDER=tcp \
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2" \
|
||||
TORCH_THREADS=48 \
|
||||
OBS_FILTER=ring_xz \
|
||||
MAIN1_NQ=128 \
|
||||
MAIN1_LAYERS=24 \
|
||||
MAIN1_BOND=1024 \
|
||||
tools/run_vidal_mpi_contest_cases.sh main1
|
||||
```
|
||||
6
poetry.lock
generated
6
poetry.lock
generated
@@ -1733,14 +1733,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "mako"
|
||||
version = "1.3.10"
|
||||
version = "1.3.11"
|
||||
description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59"},
|
||||
{file = "mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28"},
|
||||
{file = "mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77"},
|
||||
{file = "mako-1.3.11.tar.gz", hash = "sha256:071eb4ab4c5010443152255d77db7faa6ce5916f35226eb02dc34479b6858069"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
||||
@@ -31,11 +31,13 @@ cuquantum-python-cu12 = { version = "^25.9.1", optional = true }
|
||||
qmatchatea = { version = "^1.4.3", optional = true }
|
||||
qiskit = { version = "^1.4.0", optional = true }
|
||||
qtealeaves = { version = "^1.5.20", optional = true }
|
||||
distributed = { version = ">=2024", optional = true }
|
||||
|
||||
|
||||
[tool.poetry.extras]
|
||||
cuda = ["cupy-cuda12x", "cuda-toolkit", "nvidia-nccl-cu12", "cuquantum-python-cu12", "mpi4py"]
|
||||
qmatchatea = ["qmatchatea"]
|
||||
dask = ["distributed"]
|
||||
|
||||
[tool.poetry.group.docs]
|
||||
optional = true
|
||||
|
||||
134
run_vidal_mps_cases.sh
Executable file
134
run_vidal_mps_cases.sh
Executable file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Focused Vidal/MPS expectation test cases for 1D chain circuits.
|
||||
#
|
||||
# These cases intentionally avoid qmatchatea and generic TN paths. They target
|
||||
# the current supported scope: one-qubit gates, adjacent two-qubit gates, and
|
||||
# Pauli-sum expectation values on a 1D chain.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-hostfile}"
|
||||
|
||||
THREADS="${THREADS:-32}"
|
||||
MPI_RANKS="${MPI_RANKS:-16}"
|
||||
MPI_THREADS="${MPI_THREADS:-12}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
run() {
|
||||
echo
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "$*"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
smoke)
|
||||
# Short correctness-oriented run. Useful before starting long jobs.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits 40 \
|
||||
--nlayers 10 \
|
||||
--bond 2048 \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx long_z_string
|
||||
;;
|
||||
|
||||
convergence)
|
||||
# Same circuit/observable, increasing bond. Check value convergence.
|
||||
for bond in ${BONDS:-4096 16384 65536}; do
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "$bond" \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits "${CIRCUIT:-brickwall_cnot}" \
|
||||
--observables "${OBSERVABLE:-ring_xz}"
|
||||
done
|
||||
;;
|
||||
|
||||
single-long)
|
||||
# Single long Vidal run. On node-3, a similar n=40,l=30,bond=2048 case
|
||||
# took about 9 minutes for one expectation. This one is meant to be longer.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "${BOND:-65536}" \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits "${CIRCUIT:-brickwall_cnot}" \
|
||||
--observables "${OBSERVABLE:-ring_xz}"
|
||||
;;
|
||||
|
||||
suite-long)
|
||||
# Application-style multi-circuit, multi-observable MPS run.
|
||||
# This is intentionally multi-term and should run much longer than single-long.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "${BOND:-65536}" \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
mpi-long)
|
||||
# Multi-node Vidal segmented MPS run. Uses HOSTFILE.
|
||||
run "$MPIEXEC" -hostfile "$HOSTFILE" -n "$MPI_RANKS" "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "${BOND:-65536}" \
|
||||
--torch-threads "$MPI_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
stress)
|
||||
# Heavier entanglement. Start only after single-long is stable.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-18}" \
|
||||
--bond "${BOND:-262144}" \
|
||||
--torch-threads "${THREADS:-48}" \
|
||||
--circuits "${CIRCUIT:-rxx_rzz}" \
|
||||
--observables ring_xz open_zz range2_xx
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat <<'EOF'
|
||||
Usage: ./run_vidal_mps_cases.sh [smoke|convergence|single-long|suite-long|mpi-long|stress]
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
THREADS=32
|
||||
OMP_NUM_THREADS=1 MKL_NUM_THREADS=1
|
||||
|
||||
Single-node scale overrides:
|
||||
NQ=80 LAYERS=16 BOND=65536
|
||||
CIRCUIT=brickwall_cnot
|
||||
OBSERVABLE=ring_xz
|
||||
BONDS="4096 16384 65536" # for convergence mode
|
||||
|
||||
Multi-node overrides:
|
||||
HOSTFILE=hostfile
|
||||
MPI_RANKS=16 MPI_THREADS=12
|
||||
|
||||
Recommended first runs:
|
||||
./run_vidal_mps_cases.sh smoke
|
||||
./run_vidal_mps_cases.sh convergence
|
||||
./run_vidal_mps_cases.sh single-long
|
||||
EOF
|
||||
;;
|
||||
esac
|
||||
@@ -3,9 +3,10 @@ from typing import Union
|
||||
from qibo.config import raise_error
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.backends.cpu import CpuTensorNet
|
||||
from qibotn.backends.cutensornet import CuTensorNet # pylint: disable=E0401
|
||||
|
||||
PLATFORMS = ("cutensornet", "quimb", "qmatchatea")
|
||||
PLATFORMS = ("cutensornet", "cpu", "quimb", "qmatchatea", "vidal")
|
||||
|
||||
|
||||
class MetaBackend:
|
||||
@@ -24,10 +25,12 @@ class MetaBackend:
|
||||
|
||||
if platform == "cutensornet": # pragma: no cover
|
||||
return CuTensorNet(runcard)
|
||||
elif platform == "cpu":
|
||||
return CpuTensorNet(runcard)
|
||||
elif platform == "quimb": # pragma: no cover
|
||||
import qibotn.backends.quimb as qmb
|
||||
|
||||
quimb_backend = kwargs.get("quimb_backend", "numpy")
|
||||
quimb_backend = kwargs.get("quimb_backend", "torch")
|
||||
contraction_optimizer = kwargs.get("contraction_optimizer", "auto-hq")
|
||||
return qmb.BACKENDS[quimb_backend](
|
||||
quimb_backend=quimb_backend, contraction_optimizer=contraction_optimizer
|
||||
@@ -36,6 +39,10 @@ class MetaBackend:
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
|
||||
return QMatchaTeaBackend()
|
||||
elif platform == "vidal":
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
return VidalBackend()
|
||||
else:
|
||||
raise_error(
|
||||
NotImplementedError,
|
||||
|
||||
752
src/qibotn/backends/cpu.py
Normal file
752
src/qibotn/backends/cpu.py
Normal file
@@ -0,0 +1,752 @@
|
||||
"""CPU tensor-network backend with cutensornet-style runcard support."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import pickle
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from qibo import hamiltonians
|
||||
from qibo.backends import NumpyBackend
|
||||
from qibo.config import raise_error
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.backends.vidal import (
|
||||
_observable_mpo_tensors,
|
||||
_operator_terms_to_mpo,
|
||||
_symbolic_hamiltonian_to_operator_terms,
|
||||
_unsupported_reason,
|
||||
)
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor
|
||||
from qibotn.observables import check_observable
|
||||
from qibotn.result import TensorNetworkResult
|
||||
|
||||
|
||||
def _as_bool_or_dict(value, name):
|
||||
if isinstance(value, (bool, dict)):
|
||||
return value
|
||||
raise TypeError(f"{name} has an unexpected type")
|
||||
|
||||
|
||||
def _bind_numa_node(rank):
|
||||
"""Bind the calling process (or thread) to the NUMA node for *rank*.
|
||||
|
||||
The MPI rank is converted to a local (per-node) rank through the
|
||||
environment variables commonly set by Open MPI, MVAPICH, and Slurm.
|
||||
The process CPU affinity and NUMA memory policy are set accordingly.
|
||||
|
||||
Returns the NUMA domain that was selected, or ``None`` if the binding
|
||||
could not be determined.
|
||||
"""
|
||||
current_affinity = os.sched_getaffinity(0)
|
||||
online_cpus = set(range(os.cpu_count() or 1))
|
||||
if current_affinity and current_affinity != online_cpus:
|
||||
# MPI launchers such as Intel MPI often pin local ranks correctly
|
||||
# before Python starts. Do not narrow that placement further.
|
||||
return None
|
||||
|
||||
local_rank = rank
|
||||
for name in (
|
||||
"OMPI_COMM_WORLD_LOCAL_RANK",
|
||||
"MV2_COMM_WORLD_LOCAL_RANK",
|
||||
"MPI_LOCALRANKID",
|
||||
"I_MPI_LOCAL_RANK",
|
||||
"SLURM_LOCALID",
|
||||
):
|
||||
try:
|
||||
local_rank = int(os.environ[name])
|
||||
break
|
||||
except (KeyError, ValueError):
|
||||
pass
|
||||
|
||||
domains = _available_numa_domains()
|
||||
if not domains:
|
||||
return None
|
||||
|
||||
local_size = _local_world_size()
|
||||
assigned_domains = domains[local_rank::local_size]
|
||||
if not assigned_domains:
|
||||
assigned_domains = [domains[local_rank % len(domains)]]
|
||||
|
||||
domain = assigned_domains[0]
|
||||
cpus = set()
|
||||
for selected in assigned_domains:
|
||||
cpulist = f"/sys/devices/system/node/node{selected}/cpulist"
|
||||
try:
|
||||
cpus.update(_parse_cpu_list(open(cpulist, encoding="utf-8").read().strip()))
|
||||
except (FileNotFoundError, OSError):
|
||||
pass
|
||||
try:
|
||||
if cpus:
|
||||
os.sched_setaffinity(0, cpus)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
|
||||
libnuma = ctypes.CDLL("libnuma.so.1")
|
||||
if libnuma.numa_available() >= 0:
|
||||
libnuma.numa_run_on_node(ctypes.c_int(domain))
|
||||
libnuma.numa_set_preferred(ctypes.c_int(domain))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return domain
|
||||
|
||||
|
||||
def _available_numa_domains():
|
||||
nodes = []
|
||||
base = Path("/sys/devices/system/node")
|
||||
try:
|
||||
for path in base.glob("node[0-9]*"):
|
||||
try:
|
||||
nodes.append(int(path.name[4:]))
|
||||
except ValueError:
|
||||
pass
|
||||
except OSError:
|
||||
return []
|
||||
return sorted(nodes)
|
||||
|
||||
|
||||
def _local_world_size():
|
||||
for name in (
|
||||
"OMPI_COMM_WORLD_LOCAL_SIZE",
|
||||
"MV2_COMM_WORLD_LOCAL_SIZE",
|
||||
"MPI_LOCALNRANKS",
|
||||
"I_MPI_LOCAL_SIZE",
|
||||
"SLURM_NTASKS_PER_NODE",
|
||||
):
|
||||
value = os.environ.get(name)
|
||||
if not value:
|
||||
continue
|
||||
try:
|
||||
return max(1, int(str(value).split("(", 1)[0]))
|
||||
except ValueError:
|
||||
pass
|
||||
return 1
|
||||
|
||||
|
||||
def _parse_cpu_list(text):
|
||||
cpus = set()
|
||||
for item in text.split(","):
|
||||
item = item.strip()
|
||||
if not item:
|
||||
continue
|
||||
if "-" in item:
|
||||
start, stop = item.split("-", 1)
|
||||
cpus.update(range(int(start), int(stop) + 1))
|
||||
else:
|
||||
cpus.add(int(item))
|
||||
return cpus
|
||||
|
||||
|
||||
class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
"""CPU replacement for the cutensornet runcard execution surface.
|
||||
|
||||
The backend preserves the high-level runcard knobs used by the GPU backend:
|
||||
``MPI_enabled``, ``MPS_enabled`` and ``expectation_enabled``. Generic TN
|
||||
work is delegated to quimb on CPU; MPS expectation uses the Vidal fast path
|
||||
when the circuit is nearest-neighbor and falls back to quimb otherwise.
|
||||
"""
|
||||
|
||||
def __init__(self, runcard=None):
|
||||
super().__init__()
|
||||
self.name = "qibotn"
|
||||
self.platform = "cpu"
|
||||
self.precision = "double"
|
||||
self.configure_tn_simulation(runcard)
|
||||
|
||||
def configure_tn_simulation(self, runcard=None):
|
||||
runcard = {} if runcard is None else runcard
|
||||
self.rank = 0
|
||||
self.MPI_enabled = bool(runcard.get("MPI_enabled", False))
|
||||
self.NCCL_enabled = bool(runcard.get("NCCL_enabled", False))
|
||||
if self.NCCL_enabled:
|
||||
raise_error(NotImplementedError, "NCCL is only available for GPU backends.")
|
||||
|
||||
expectation = runcard.get("expectation_enabled", False)
|
||||
if expectation is True:
|
||||
self.expectation_enabled = True
|
||||
self.observable = None
|
||||
elif expectation is False:
|
||||
self.expectation_enabled = False
|
||||
self.observable = None
|
||||
elif isinstance(expectation, (dict, hamiltonians.SymbolicHamiltonian)):
|
||||
self.expectation_enabled = True
|
||||
self.observable = expectation
|
||||
else:
|
||||
raise TypeError("expectation_enabled has an unexpected type")
|
||||
|
||||
mps = _as_bool_or_dict(runcard.get("MPS_enabled", False), "MPS_enabled")
|
||||
self.MPS_enabled = bool(mps)
|
||||
self.mps_options = mps if isinstance(mps, dict) else {}
|
||||
|
||||
self.max_bond_dimension = runcard.get(
|
||||
"max_bond_dimension",
|
||||
self.mps_options.get("max_bond_dimension", 512),
|
||||
)
|
||||
self.cut_ratio = runcard.get(
|
||||
"cut_ratio",
|
||||
self.mps_options.get(
|
||||
"cut_ratio",
|
||||
self.mps_options.get("svd_method", {}).get("abs_cutoff", 1e-12),
|
||||
),
|
||||
)
|
||||
self.tensor_module = runcard.get("tensor_module", "torch")
|
||||
self.dtype = runcard.get("dtype", "complex128")
|
||||
self.compile_circuit = bool(runcard.get("compile_circuit", False))
|
||||
self.preprocess = bool(runcard.get("preprocess", False))
|
||||
self.mpi_term_batch_size = runcard.get(
|
||||
"mpi_term_batch_size",
|
||||
runcard.get("term_batch_size", None),
|
||||
)
|
||||
self.torch_threads = runcard.get("torch_threads", None)
|
||||
self.quimb_backend = runcard.get("quimb_backend", "torch")
|
||||
self.contraction_optimizer = runcard.get("contraction_optimizer", "auto-hq")
|
||||
self.parallel_opts = runcard.get("parallel_opts", {})
|
||||
self.parallel_stats = []
|
||||
|
||||
def execute_circuit(
|
||||
self,
|
||||
circuit,
|
||||
initial_state=None,
|
||||
nshots=None,
|
||||
prob_type=None,
|
||||
return_array=False,
|
||||
**prob_kwargs,
|
||||
):
|
||||
if initial_state is not None:
|
||||
raise_error(NotImplementedError, "QiboTN CPU backend does not support initial state.")
|
||||
|
||||
if self.torch_threads is not None and self.tensor_module == "torch":
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(self.torch_threads)
|
||||
|
||||
if self.expectation_enabled:
|
||||
value = self.expectation(circuit, self.observable)
|
||||
if self.MPI_enabled and self.rank > 0:
|
||||
return np.asarray([0], dtype=np.int64)
|
||||
dtype = np.complex128 if np.iscomplexobj(value) else np.float64
|
||||
return np.asarray([value], dtype=dtype)
|
||||
|
||||
backend = self._quimb_backend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz="mps" if self.MPS_enabled else None,
|
||||
max_bond_dimension=self.max_bond_dimension if self.MPS_enabled else None,
|
||||
svd_cutoff=self.cut_ratio,
|
||||
)
|
||||
return backend.execute_circuit(
|
||||
circuit=circuit,
|
||||
nshots=nshots,
|
||||
return_array=return_array,
|
||||
)
|
||||
|
||||
def expectation(self, circuit, observable=None, preprocess=None, compile_circuit=None):
|
||||
mpo_tensors = _observable_mpo_tensors(observable, circuit.nqubits)
|
||||
if mpo_tensors is None:
|
||||
observable = check_observable(observable, circuit.nqubits)
|
||||
use_preprocess = self.preprocess if preprocess is None else preprocess
|
||||
if mpo_tensors is not None and not self.MPS_enabled:
|
||||
raise_error(
|
||||
NotImplementedError,
|
||||
"MPO expectation is currently supported only by the Vidal MPS path.",
|
||||
)
|
||||
|
||||
if self.MPS_enabled:
|
||||
reason = _unsupported_reason(circuit)
|
||||
if reason is None or self.compile_circuit or use_preprocess:
|
||||
return self._vidal_expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=use_preprocess,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
|
||||
backend = self._quimb_backend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz="mps" if self.MPS_enabled else None,
|
||||
max_bond_dimension=self.max_bond_dimension if self.MPS_enabled else None,
|
||||
svd_cutoff=self.cut_ratio,
|
||||
)
|
||||
if self.MPI_enabled:
|
||||
return self._quimb_expectation_mpi(backend, circuit, observable)
|
||||
return self._quimb_expectation_processpool(backend, circuit, observable)
|
||||
|
||||
def _vidal_expectation(
|
||||
self, circuit, observable, preprocess=False, compile_circuit=None
|
||||
):
|
||||
if compile_circuit is None:
|
||||
compile_circuit = self.compile_circuit
|
||||
if preprocess:
|
||||
if self.MPI_enabled:
|
||||
from mpi4py import MPI
|
||||
|
||||
self.rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
compile_circuit=compile_circuit,
|
||||
mpi_approach="CT" if self.MPI_enabled else "SR",
|
||||
mpi_term_batch_size=self.mpi_term_batch_size,
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
self.rank = getattr(backend, "rank", self.rank)
|
||||
self.last_truncation_error = getattr(
|
||||
backend, "last_truncation_error", np.nan
|
||||
)
|
||||
self.last_max_truncation_error = getattr(
|
||||
backend, "last_max_truncation_error", np.nan
|
||||
)
|
||||
return value
|
||||
|
||||
mpo_tensors = _observable_mpo_tensors(observable, circuit.nqubits)
|
||||
if self.MPI_enabled:
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
self.rank = comm.Get_rank()
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
self.last_truncation_error = float(executor.global_truncation_error())
|
||||
self.last_max_truncation_error = float(
|
||||
executor.global_max_truncation_error()
|
||||
)
|
||||
if mpo_tensors is not None:
|
||||
value = executor.expectation_mpo_root(mpo_tensors)
|
||||
else:
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(observable)
|
||||
value = executor.expectation_mpo_root(
|
||||
_operator_terms_to_mpo(terms, circuit.nqubits)
|
||||
)
|
||||
return np.nan if self.rank != 0 else value
|
||||
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
self.last_truncation_error = float(executor.truncation_error)
|
||||
self.last_max_truncation_error = float(executor.max_truncation_error)
|
||||
if mpo_tensors is not None:
|
||||
return executor.expectation_mpo(mpo_tensors)
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(observable)
|
||||
return executor.expectation_mpo(_operator_terms_to_mpo(terms, circuit.nqubits))
|
||||
|
||||
def _quimb_backend(self):
|
||||
import qibotn.backends.quimb as qmb
|
||||
|
||||
return qmb.BACKENDS[self.quimb_backend](
|
||||
quimb_backend=self.quimb_backend,
|
||||
contraction_optimizer=self.contraction_optimizer,
|
||||
)
|
||||
|
||||
def _bind_rank_to_numa_domain(self, rank):
|
||||
self.numa_domain = _bind_numa_node(rank)
|
||||
|
||||
def _default_search_workers(self, nranks=1):
|
||||
if self.torch_threads:
|
||||
return max(1, int(self.torch_threads))
|
||||
return max(1, (os.cpu_count() or 1) // max(1, nranks))
|
||||
|
||||
def _quimb_expectation_processpool(self, backend, circuit, observable):
|
||||
return self._quimb_expectation_search(
|
||||
backend,
|
||||
circuit,
|
||||
observable,
|
||||
method="processpool",
|
||||
comm=None,
|
||||
)
|
||||
|
||||
def _quimb_expectation_mpi(self, backend, circuit, observable):
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
self.rank = comm.Get_rank()
|
||||
self._bind_rank_to_numa_domain(self.rank)
|
||||
|
||||
return self._quimb_expectation_search(
|
||||
backend,
|
||||
circuit,
|
||||
observable,
|
||||
method="mpi",
|
||||
comm=comm,
|
||||
)
|
||||
|
||||
def _quimb_expectation_search(self, backend, circuit, observable, method, comm=None):
|
||||
rank = comm.Get_rank() if comm is not None else 0
|
||||
size = comm.Get_size() if comm is not None else 1
|
||||
self.rank = rank
|
||||
|
||||
from qibotn.observables import extract_gates_and_qubits
|
||||
from qibotn.parallel import (
|
||||
contraction_tree_costs,
|
||||
parallel_contract,
|
||||
parallel_path_search,
|
||||
)
|
||||
from qibotn.backends.quimb import (
|
||||
PAULI_DENSE_MAX_QUBITS,
|
||||
_pauli_term_to_dense_operator,
|
||||
pauli_product_expectation_tn,
|
||||
)
|
||||
|
||||
opts = dict(self.parallel_opts)
|
||||
user_slicing_opts = opts.get("slicing_opts")
|
||||
search_workers = opts.get("search_workers", self._default_search_workers(size))
|
||||
search_repeats = opts.get("max_repeats", 128)
|
||||
search_time = opts.get("max_time", 60)
|
||||
search_backend = opts.get("search_backend")
|
||||
dask_address = opts.get("dask_address")
|
||||
dask_close_workers = bool(opts.get("dask_close_workers", False))
|
||||
print_stats = bool(opts.get("print_stats", False))
|
||||
debug_trials = bool(opts.get("debug_trials", False))
|
||||
search_only = bool(opts.get("search_only", False))
|
||||
save_tree_path = opts.get("save_tree_path")
|
||||
load_tree_path = opts.get("load_tree_path")
|
||||
loaded_trees = None
|
||||
saved_trees = []
|
||||
saved_costs = []
|
||||
|
||||
if load_tree_path:
|
||||
with Path(load_tree_path).open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
loaded_trees = payload["trees"] if isinstance(payload, dict) else payload
|
||||
if not isinstance(loaded_trees, (list, tuple)):
|
||||
loaded_trees = [loaded_trees]
|
||||
|
||||
qc = backend._qibo_circuit_to_quimb(
|
||||
circuit,
|
||||
quimb_circuit_type=backend.circuit_ansatz,
|
||||
gate_opts={
|
||||
"max_bond": self.max_bond_dimension,
|
||||
"cutoff": self.cut_ratio,
|
||||
},
|
||||
)
|
||||
|
||||
total_value = 0.0 + 0.0j
|
||||
terms = extract_gates_and_qubits(observable)
|
||||
for term_index, (coeff, factors) in enumerate(terms):
|
||||
if not factors:
|
||||
if self.rank == 0:
|
||||
total_value += coeff
|
||||
continue
|
||||
|
||||
if len(factors) > PAULI_DENSE_MAX_QUBITS:
|
||||
tn = pauli_product_expectation_tn(
|
||||
qc,
|
||||
factors,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
)
|
||||
else:
|
||||
op, where = _pauli_term_to_dense_operator(factors)
|
||||
tn = qc.local_expectation(
|
||||
op,
|
||||
where,
|
||||
rehearse="tn",
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
)
|
||||
slicing_opts = self._mpi_slicing_opts(
|
||||
user_slicing_opts,
|
||||
)
|
||||
|
||||
if loaded_trees is not None:
|
||||
if term_index >= len(loaded_trees):
|
||||
raise ValueError(
|
||||
f"Loaded tree file has {len(loaded_trees)} tree(s), "
|
||||
f"but term {term_index} was requested."
|
||||
)
|
||||
tree = loaded_trees[term_index]
|
||||
search_seconds = 0.0
|
||||
if self.rank == 0 and print_stats:
|
||||
print(
|
||||
f"tn_tree_loaded term={term_index} path={load_tree_path}",
|
||||
flush=True,
|
||||
)
|
||||
else:
|
||||
search_start = time.perf_counter()
|
||||
tree = parallel_path_search(
|
||||
tn,
|
||||
tn.outer_inds(),
|
||||
method="dask" if method != "mpi" and search_backend == "dask" else method,
|
||||
total_repeats=search_repeats,
|
||||
max_time=search_time,
|
||||
n_workers=search_workers,
|
||||
slicing_opts=slicing_opts,
|
||||
trial_timeout=opts.get("trial_timeout"),
|
||||
search_backend=search_backend,
|
||||
dask_address=dask_address,
|
||||
debug_trials=debug_trials,
|
||||
dask_close_workers=dask_close_workers,
|
||||
)
|
||||
search_seconds = time.perf_counter() - search_start
|
||||
if tree is None:
|
||||
raise RuntimeError("Failed to find a contraction tree for CPU TN MPI.")
|
||||
if self.parallel_opts.get("contract_implementation") == "cpp":
|
||||
from qibotn.torch_contractor import prepare_torch_cpp_contractor
|
||||
|
||||
prepare_torch_cpp_contractor(tree)
|
||||
|
||||
path_cost = contraction_tree_costs(tree)
|
||||
search_stats = getattr(tree, "qibotn_search_stats", {})
|
||||
if save_tree_path and loaded_trees is None:
|
||||
saved_trees.append(tree)
|
||||
saved_costs.append(path_cost)
|
||||
if self.rank == 0 and print_stats:
|
||||
print(
|
||||
"tn_search_done "
|
||||
f"term={term_index} "
|
||||
f"search_seconds={search_seconds:.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', search_repeats)} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={path_cost['nslices']} "
|
||||
f"log10_flops={path_cost['log10_flops']:.3f} "
|
||||
f"log10_write={path_cost['log10_write']:.3f} "
|
||||
f"log2_size={path_cost['log2_size']:.3f} "
|
||||
f"log10_combo={path_cost['log10_combo']:.3f} "
|
||||
f"peak_memory_gib={path_cost['peak_memory_gib']:.6g}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if search_only:
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": int(getattr(tree, "multiplicity", 1)),
|
||||
"slice_assignment": "search_only",
|
||||
"rank_slices": [],
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": 0.0,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if comm is None and int(getattr(tree, "multiplicity", 1)) <= 1:
|
||||
if self.rank == 0:
|
||||
contract_start = time.perf_counter()
|
||||
value = self._contract_term_unsliced(tn, tree, backend)
|
||||
contract_seconds = time.perf_counter() - contract_start
|
||||
if print_stats:
|
||||
print(
|
||||
"tn_contract_done "
|
||||
f"term={term_index} "
|
||||
f"contract_seconds={contract_seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": 1,
|
||||
"slice_assignment": "root",
|
||||
"rank_slices": [1] + [0] * (size - 1),
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
)
|
||||
total_value += coeff * complex(value)
|
||||
continue
|
||||
|
||||
if comm is None:
|
||||
contract_start = time.perf_counter()
|
||||
value = self._contract_term_unsliced(tn, tree, backend)
|
||||
contract_seconds = time.perf_counter() - contract_start
|
||||
if print_stats:
|
||||
print(
|
||||
"tn_contract_done "
|
||||
f"term={term_index} "
|
||||
f"contract_seconds={contract_seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": int(getattr(tree, "multiplicity", 1)),
|
||||
"slice_assignment": "local",
|
||||
"rank_slices": [int(getattr(tree, "multiplicity", 1))],
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
)
|
||||
total_value += coeff * complex(np.asarray(value).reshape(-1)[0])
|
||||
continue
|
||||
|
||||
contract_start = time.perf_counter()
|
||||
arrays = self._term_arrays(tn, backend)
|
||||
value, stats = parallel_contract(
|
||||
tree,
|
||||
arrays,
|
||||
method="mpi",
|
||||
comm=comm,
|
||||
return_stats=True,
|
||||
implementation=self.parallel_opts.get("contract_implementation"),
|
||||
)
|
||||
contract_seconds = time.perf_counter() - contract_start
|
||||
gathered_stats = comm.gather(stats, root=0)
|
||||
if rank == 0:
|
||||
if print_stats:
|
||||
print(
|
||||
"tn_contract_done "
|
||||
f"term={term_index} "
|
||||
f"contract_seconds={contract_seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": stats.nslices,
|
||||
"slice_assignment": stats.assignment,
|
||||
"rank_slices": [
|
||||
item.local_slices for item in gathered_stats
|
||||
],
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
)
|
||||
total_value += coeff * complex(np.asarray(value).reshape(-1)[0])
|
||||
|
||||
if self.rank == 0 and save_tree_path and loaded_trees is None:
|
||||
path = Path(save_tree_path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("wb") as f:
|
||||
pickle.dump(
|
||||
{
|
||||
"trees": saved_trees,
|
||||
"costs": saved_costs,
|
||||
"nterms": len(saved_trees),
|
||||
},
|
||||
f,
|
||||
protocol=pickle.HIGHEST_PROTOCOL,
|
||||
)
|
||||
if print_stats:
|
||||
print(
|
||||
f"tn_tree_saved path={save_tree_path} nterms={len(saved_trees)}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if search_only:
|
||||
return np.nan
|
||||
|
||||
return np.nan if rank != 0 else float(np.real(total_value))
|
||||
|
||||
def _contract_term_unsliced(self, tn, tree, backend):
|
||||
contract_implementation = self.parallel_opts.get("contract_implementation")
|
||||
if contract_implementation == "cpp":
|
||||
if backend.backend != "torch":
|
||||
raise ValueError("contract_implementation='cpp' requires torch backend.")
|
||||
from qibotn.backends.quimb import _torch_cpu_array, _torch_dtype
|
||||
from qibotn.torch_contractor import contract_tree_cpp
|
||||
|
||||
arrays = [
|
||||
_torch_cpu_array(array, dtype=_torch_dtype(self.dtype))
|
||||
for array in tn.arrays
|
||||
]
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
if nslices > 1:
|
||||
total = None
|
||||
for slice_id in range(nslices):
|
||||
value = contract_tree_cpp(tree, tree.slice_arrays(arrays, slice_id))
|
||||
total = value if total is None else total + value
|
||||
return total
|
||||
return contract_tree_cpp(tree, arrays)
|
||||
|
||||
if backend.backend == "torch":
|
||||
from qibotn.backends.quimb import _torch_cpu_array, _torch_dtype
|
||||
|
||||
for tensor in tn.tensors:
|
||||
tensor._data = _torch_cpu_array(
|
||||
tensor._data,
|
||||
dtype=_torch_dtype(self.dtype),
|
||||
)
|
||||
return tn.contract(
|
||||
all,
|
||||
output_inds=(),
|
||||
optimize=tree,
|
||||
backend="torch",
|
||||
implementation=contract_implementation,
|
||||
)
|
||||
|
||||
return tn.contract(
|
||||
all,
|
||||
output_inds=(),
|
||||
optimize=tree,
|
||||
backend=backend.backend,
|
||||
implementation=contract_implementation,
|
||||
)
|
||||
|
||||
def _mpi_slicing_opts(self, user_slicing_opts):
|
||||
return None if user_slicing_opts is None else dict(user_slicing_opts)
|
||||
|
||||
def _term_arrays(self, tn, backend):
|
||||
if backend.backend == "torch":
|
||||
from qibotn.backends.quimb import _torch_cpu_array, _torch_dtype
|
||||
|
||||
return [
|
||||
_torch_cpu_array(array, dtype=_torch_dtype(self.dtype))
|
||||
for array in tn.arrays
|
||||
]
|
||||
from qibotn.backends.quimb import _numpy_dtype
|
||||
|
||||
return [backend.engine.asarray(array, dtype=_numpy_dtype(self.dtype)) for array in tn.arrays]
|
||||
@@ -9,8 +9,10 @@ import qmatchatea
|
||||
import qtealeaves
|
||||
from qibo.backends import NumpyBackend
|
||||
from qibo.config import raise_error
|
||||
from qmatchatea.utils import MPISettings
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.observables import check_observable
|
||||
from qibotn.result import TensorNetworkResult
|
||||
|
||||
|
||||
@@ -38,6 +40,14 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
trunc_tracking_mode: str = "C",
|
||||
svd_control: str = "A",
|
||||
ini_bond_dimension: int = 1,
|
||||
tensor_module: str = "numpy",
|
||||
compile_circuit: bool = False,
|
||||
cache_gate_tensors: bool = True,
|
||||
track_memory: bool = False,
|
||||
mpi_approach: str = "SR",
|
||||
mpi_num_procs: int = 1,
|
||||
mpi_where_barriers: int = -1,
|
||||
mpi_isometrization: int = -1,
|
||||
):
|
||||
"""Configure TN simulation given Quantum Matcha Tea interface.
|
||||
|
||||
@@ -75,6 +85,18 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
ini_bond_dimension=ini_bond_dimension,
|
||||
)
|
||||
self.ansatz = ansatz
|
||||
self.tensor_module = tensor_module
|
||||
self.compile_circuit = compile_circuit
|
||||
self.cache_gate_tensors = cache_gate_tensors
|
||||
self.track_memory = track_memory
|
||||
self.mpi_settings = MPISettings(
|
||||
mpi_approach=mpi_approach,
|
||||
num_procs=mpi_num_procs,
|
||||
where_barriers=mpi_where_barriers,
|
||||
isometrization=mpi_isometrization,
|
||||
)
|
||||
if hasattr(self, "qmatchatea_backend"):
|
||||
self._setup_backend_specifics()
|
||||
|
||||
def _setup_backend_specifics(self):
|
||||
"""Configure qmatchatea QCBackend object."""
|
||||
@@ -88,12 +110,15 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
else "Z" if self.precision == "double" else "A"
|
||||
)
|
||||
|
||||
# TODO: once MPI is available for Python, integrate it here
|
||||
self.qmatchatea_backend = qmatchatea.QCBackend(
|
||||
precision=qmatchatea_precision,
|
||||
device=qmatchatea_device,
|
||||
ansatz=self.ansatz,
|
||||
tensor_module=self.tensor_module,
|
||||
mpi_settings=self.mpi_settings,
|
||||
)
|
||||
self.qmatchatea_backend.cache_gate_tensors = self.cache_gate_tensors
|
||||
self.qmatchatea_backend.track_memory = self.track_memory
|
||||
|
||||
def execute_circuit(
|
||||
self,
|
||||
@@ -193,7 +218,7 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
statevector=statevector,
|
||||
)
|
||||
|
||||
def expectation(self, circuit, observable):
|
||||
def expectation(self, circuit, observable, preprocess=True, compile_circuit=None):
|
||||
"""Compute the expectation value of a Qibo-friendly ``observable`` on
|
||||
the Tensor Network constructed from a Qibo ``circuit``.
|
||||
|
||||
@@ -216,8 +241,14 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
simulation setup.
|
||||
"""
|
||||
|
||||
observable = check_observable(observable, circuit.nqubits)
|
||||
|
||||
# From Qibo to Qiskit
|
||||
circuit = self._qibocirc_to_qiskitcirc(circuit)
|
||||
circuit = self._qibocirc_to_qiskitcirc(
|
||||
circuit,
|
||||
preprocess=preprocess,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
run_qk_params = qmatchatea.preprocessing.qk_transpilation_params(False)
|
||||
|
||||
operators = qmatchatea.QCOperators()
|
||||
@@ -234,19 +265,37 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
operators=operators,
|
||||
)
|
||||
|
||||
if self.qmatchatea_backend.mpi_approach != "SR":
|
||||
from qtealeaves.tooling.mpisupport import MPI
|
||||
|
||||
if MPI is not None and MPI.COMM_WORLD.Get_rank() != 0:
|
||||
return np.nan
|
||||
|
||||
return np.real(results.observables["custom_hamiltonian"])
|
||||
|
||||
def _qibocirc_to_qiskitcirc(self, qibo_circuit) -> qiskit.QuantumCircuit:
|
||||
def _qibocirc_to_qiskitcirc(
|
||||
self, qibo_circuit, preprocess=True, compile_circuit=None
|
||||
) -> qiskit.QuantumCircuit:
|
||||
"""Convert a Qibo Circuit into a Qiskit Circuit."""
|
||||
# Convert the circuit to QASM 2.0 to qiskit
|
||||
qasm_circuit = qibo_circuit.to_qasm()
|
||||
qiskit_circuit = qiskit.QuantumCircuit.from_qasm_str(qasm_circuit)
|
||||
|
||||
if compile_circuit is None:
|
||||
compile_circuit = self.compile_circuit
|
||||
|
||||
if not preprocess:
|
||||
if compile_circuit:
|
||||
qiskit_circuit = qmatchatea.tensor_compiler(qiskit_circuit)
|
||||
return qiskit_circuit
|
||||
|
||||
# Transpile the circuit to adapt it to the linear structure of the MPS,
|
||||
# with the constraint of having only the gates basis_gates
|
||||
qiskit_circuit = qmatchatea.preprocessing.preprocess(
|
||||
qiskit_circuit,
|
||||
qk_params=qmatchatea.preprocessing.qk_transpilation_params(),
|
||||
qk_params=qmatchatea.preprocessing.qk_transpilation_params(
|
||||
tensor_compiler=compile_circuit
|
||||
),
|
||||
)
|
||||
return qiskit_circuit
|
||||
|
||||
|
||||
@@ -37,8 +37,129 @@ GATE_MAP = {
|
||||
"measure": "measure",
|
||||
}
|
||||
|
||||
PAULI_DENSE_MAX_QUBITS = 8
|
||||
|
||||
def __init__(self, quimb_backend="numpy", contraction_optimizer="auto-hq"):
|
||||
|
||||
def _torch_cpu_array(data, dtype=None):
|
||||
"""Convert array-like data to a contiguous CPU torch tensor."""
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
if isinstance(data, torch.Tensor):
|
||||
x = data
|
||||
else:
|
||||
array = np.asarray(data)
|
||||
if any(stride < 0 for stride in array.strides):
|
||||
array = np.ascontiguousarray(array)
|
||||
x = torch.from_numpy(array)
|
||||
|
||||
if x.device.type != "cpu":
|
||||
x = x.cpu()
|
||||
if dtype is not None and x.dtype != dtype:
|
||||
x = x.to(dtype)
|
||||
if not x.is_contiguous():
|
||||
x = x.contiguous()
|
||||
return x
|
||||
|
||||
|
||||
def _torch_dtype(dtype):
|
||||
import torch
|
||||
|
||||
if dtype in ("complex64", "single"):
|
||||
return torch.complex64
|
||||
return torch.complex128
|
||||
|
||||
|
||||
def _numpy_dtype(dtype):
|
||||
import numpy as np
|
||||
|
||||
if dtype in ("complex64", "single"):
|
||||
return np.complex64
|
||||
return np.complex128
|
||||
|
||||
|
||||
def _arrays_to_backend(arrays, backend, engine, dtype="complex128"):
|
||||
if backend == "torch":
|
||||
return [_torch_cpu_array(array, dtype=_torch_dtype(dtype)) for array in arrays]
|
||||
return [engine.asarray(array, dtype=_numpy_dtype(dtype)) for array in arrays]
|
||||
|
||||
|
||||
def _pauli_term_to_dense_operator(factors):
|
||||
op = None
|
||||
where = []
|
||||
for qubit, gate_name in factors:
|
||||
pauli = qu.pauli(gate_name.lower())
|
||||
op = pauli if op is None else op & pauli
|
||||
where.append(qubit)
|
||||
return op, tuple(where)
|
||||
|
||||
|
||||
def pauli_product_expectation_tn(
|
||||
quimb_circuit,
|
||||
factors,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
simplify_equalize_norms=True,
|
||||
):
|
||||
"""Build the scalar TN for ``<psi|P|psi>`` without dense Pauli strings."""
|
||||
import numpy as np
|
||||
|
||||
op_by_site = {
|
||||
int(qubit): qu.pauli(str(gate_name).lower())
|
||||
for qubit, gate_name in factors
|
||||
if str(gate_name).upper() != "I"
|
||||
}
|
||||
ket = quimb_circuit.get_psi_simplified(
|
||||
seq=simplify_sequence,
|
||||
atol=simplify_atol,
|
||||
equalize_norms=simplify_equalize_norms,
|
||||
)
|
||||
bra = ket.conj().reindex(
|
||||
{
|
||||
quimb_circuit.ket_site_ind(qubit): quimb_circuit.bra_site_ind(qubit)
|
||||
for qubit in range(quimb_circuit.N)
|
||||
}
|
||||
)
|
||||
|
||||
tn = bra | ket
|
||||
identity = np.eye(2, dtype=complex)
|
||||
for qubit in range(quimb_circuit.N):
|
||||
data = op_by_site.get(qubit, identity)
|
||||
tn |= qtn.Tensor(
|
||||
data=data,
|
||||
inds=(
|
||||
quimb_circuit.bra_site_ind(qubit),
|
||||
quimb_circuit.ket_site_ind(qubit),
|
||||
),
|
||||
)
|
||||
|
||||
tn.full_simplify_(
|
||||
output_inds=(),
|
||||
seq=simplify_sequence,
|
||||
atol=simplify_atol,
|
||||
equalize_norms=simplify_equalize_norms,
|
||||
)
|
||||
return tn
|
||||
|
||||
|
||||
def pauli_product_expectation(
|
||||
quimb_circuit,
|
||||
factors,
|
||||
backend,
|
||||
optimize,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
):
|
||||
tn = pauli_product_expectation_tn(
|
||||
quimb_circuit,
|
||||
factors,
|
||||
simplify_sequence=simplify_sequence,
|
||||
simplify_atol=simplify_atol,
|
||||
)
|
||||
return tn.contract(all, output_inds=(), optimize=optimize, backend=backend)
|
||||
|
||||
|
||||
def __init__(self, quimb_backend="torch", contraction_optimizer="auto-hq"):
|
||||
super(self.__class__, self).__init__()
|
||||
|
||||
self.name = "qibotn"
|
||||
@@ -91,7 +212,7 @@ def circuit_ansatz(self):
|
||||
|
||||
|
||||
def setup_backend_specifics(
|
||||
self, quimb_backend="numpy", contractions_optimizer="auto-hq"
|
||||
self, quimb_backend="torch", contractions_optimizer="auto-hq"
|
||||
):
|
||||
"""Setup backend specifics.
|
||||
Args:
|
||||
@@ -167,7 +288,7 @@ def execute_circuit(
|
||||
raise_error(ValueError, "Initial state not None supported only for MPS ansatz.")
|
||||
|
||||
circ_quimb = self.circuit_ansatz.from_openqasm2_str(
|
||||
circuit.to_qasm(), psi0=initial_state
|
||||
circuit.to_qasm(), psi0=initial_state, gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff}
|
||||
)
|
||||
|
||||
if nshots:
|
||||
@@ -186,7 +307,16 @@ def execute_circuit(
|
||||
else:
|
||||
frequencies = None
|
||||
measured_probabilities = None
|
||||
|
||||
'''
|
||||
if return_array:
|
||||
if self.ansatz == "mps":
|
||||
psi = circ_quimb.psi
|
||||
statevector = psi.to_dense().reshape(-1)
|
||||
else:
|
||||
statevector = circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer)
|
||||
else:
|
||||
statevector = None
|
||||
'''
|
||||
statevector = (
|
||||
circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer)
|
||||
if return_array
|
||||
@@ -291,7 +421,19 @@ def _qibo_circuit_to_quimb(
|
||||
quimb_gate_name = GATE_MAP.get(gate_name, None)
|
||||
if quimb_gate_name == "measure":
|
||||
continue
|
||||
if gate_name == "cu1":
|
||||
theta = gate.parameters[0]
|
||||
c, t = gate.qubits
|
||||
circ.apply_gate("RZ", theta / 2, c)
|
||||
circ.apply_gate("RZ", theta / 2, t)
|
||||
circ.apply_gate("CNOT", c, t)
|
||||
circ.apply_gate("RZ", -theta / 2, t)
|
||||
circ.apply_gate("CNOT", c, t)
|
||||
continue
|
||||
if quimb_gate_name is None:
|
||||
if hasattr(gate, "matrix"):
|
||||
circ.apply_gate_raw(gate.matrix(), getattr(gate, "qubits", ()))
|
||||
continue
|
||||
raise_error(ValueError, f"Gate {gate_name} not supported in Quimb backend.")
|
||||
|
||||
params = getattr(gate, "parameters", ())
|
||||
@@ -334,6 +476,173 @@ def _string_to_quimb_operator(self, op_str):
|
||||
return op
|
||||
|
||||
|
||||
def expectation(self, circuit, observable, parallel=None, parallel_opts=None):
|
||||
"""
|
||||
Compute expectation value with optional parallel acceleration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
circuit : qibo.models.Circuit
|
||||
The quantum circuit.
|
||||
observable : qibo.hamiltonians.SymbolicHamiltonian or form
|
||||
The observable to measure.
|
||||
parallel : str, optional
|
||||
Parallelization method: 'mpi', 'processpool', or None (default).
|
||||
parallel_opts : dict, optional
|
||||
Options for parallel execution:
|
||||
- max_repeats: int (default 1024)
|
||||
- max_time: int (default 300)
|
||||
- search_workers: int (default 48, processpool only)
|
||||
- mpi_contract: bool (default False, use MPI for contraction)
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
The expectation value.
|
||||
"""
|
||||
from qibotn.observables import check_observable, extract_gates_and_qubits
|
||||
|
||||
if parallel_opts is None:
|
||||
parallel_opts = {}
|
||||
|
||||
observable = check_observable(observable, circuit.nqubits)
|
||||
|
||||
if parallel is None:
|
||||
# Use original implementation
|
||||
from qibotn.observables import extract_gates_and_qubits
|
||||
all_terms = extract_gates_and_qubits(observable)
|
||||
|
||||
qc = self._qibo_circuit_to_quimb(
|
||||
circuit,
|
||||
quimb_circuit_type=self.circuit_ansatz,
|
||||
gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff},
|
||||
)
|
||||
|
||||
exp_val = 0.0
|
||||
for coeff, factors in all_terms:
|
||||
if len(factors) > PAULI_DENSE_MAX_QUBITS:
|
||||
val = pauli_product_expectation(
|
||||
qc,
|
||||
factors,
|
||||
backend=self.backend,
|
||||
optimize=self.contractions_optimizer,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
)
|
||||
else:
|
||||
op, where = _pauli_term_to_dense_operator(factors)
|
||||
val = qc.local_expectation(
|
||||
op, where,
|
||||
backend=self.backend,
|
||||
optimize=self.contractions_optimizer,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
)
|
||||
exp_val += coeff * val
|
||||
|
||||
return self.real(exp_val)
|
||||
|
||||
else:
|
||||
# Use parallel implementation
|
||||
return self._expectation_parallel(circuit, observable, parallel, parallel_opts)
|
||||
|
||||
|
||||
def _expectation_parallel(self, circuit, observable, method, opts):
|
||||
"""Parallel expectation value computation."""
|
||||
from qibotn.observables import extract_gates_and_qubits
|
||||
from qibotn.parallel import parallel_path_search, parallel_contract
|
||||
import torch
|
||||
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
comm = MPI.COMM_WORLD if method == 'mpi' else None
|
||||
rank = comm.Get_rank() if comm else 0
|
||||
size = comm.Get_size() if comm else 1
|
||||
except ImportError:
|
||||
comm, rank, size = None, 0, 1
|
||||
|
||||
max_repeats = opts.get('max_repeats', 1024)
|
||||
max_time = opts.get('max_time', 300)
|
||||
search_workers = opts.get('search_workers', 48)
|
||||
mpi_contract = opts.get('mpi_contract', False)
|
||||
torch_threads = opts.get('torch_threads', None)
|
||||
slicing_opts = opts.get('slicing_opts', None)
|
||||
trial_timeout = opts.get('trial_timeout', None)
|
||||
|
||||
qc = self._qibo_circuit_to_quimb(
|
||||
circuit,
|
||||
quimb_circuit_type=self.circuit_ansatz,
|
||||
gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff},
|
||||
)
|
||||
|
||||
all_terms = extract_gates_and_qubits(observable)
|
||||
my_terms = all_terms[rank::size]
|
||||
|
||||
if method == 'mpi' and comm:
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
elif torch_threads:
|
||||
torch.set_num_threads(torch_threads)
|
||||
|
||||
my_exp = 0.0
|
||||
for coeff, factors in my_terms:
|
||||
if len(factors) > PAULI_DENSE_MAX_QUBITS:
|
||||
tn = pauli_product_expectation_tn(qc, factors)
|
||||
else:
|
||||
op, where = _pauli_term_to_dense_operator(factors)
|
||||
tn = qc.local_expectation(op, where, rehearse='tn')
|
||||
|
||||
tree = parallel_path_search(
|
||||
tn, tn.outer_inds(),
|
||||
method=method,
|
||||
total_repeats=max_repeats,
|
||||
max_time=max_time,
|
||||
n_workers=search_workers,
|
||||
slicing_opts=slicing_opts,
|
||||
trial_timeout=trial_timeout,
|
||||
)
|
||||
|
||||
if tree is None:
|
||||
continue
|
||||
|
||||
if mpi_contract and comm and size > 1:
|
||||
arrays = _arrays_to_backend(tn.arrays, self.backend, self.engine)
|
||||
val = parallel_contract(tree, arrays, method='mpi', comm=comm)
|
||||
else:
|
||||
if self.backend == "torch":
|
||||
for tensor in tn.tensors:
|
||||
tensor._data = _torch_cpu_array(
|
||||
tensor._data, dtype=torch.complex128
|
||||
)
|
||||
val = complex(
|
||||
tn.contract(
|
||||
all,
|
||||
output_inds=(),
|
||||
optimize=tree,
|
||||
backend="torch",
|
||||
)
|
||||
)
|
||||
else:
|
||||
val = complex(
|
||||
tn.contract(
|
||||
all,
|
||||
output_inds=(),
|
||||
optimize=tree,
|
||||
backend=self.backend,
|
||||
)
|
||||
)
|
||||
|
||||
my_exp += coeff * complex(val)
|
||||
|
||||
if comm:
|
||||
all_exp = comm.gather(my_exp, root=0)
|
||||
if rank == 0:
|
||||
total_exp = sum(all_exp)
|
||||
return self.real(total_exp)
|
||||
return 0.0
|
||||
|
||||
return self.real(my_exp)
|
||||
|
||||
|
||||
CLASSES_ROOTS = {"numpy": "Numpy", "torch": "PyTorch", "jax": "Jax"}
|
||||
|
||||
METHODS = {
|
||||
@@ -344,11 +653,13 @@ METHODS = {
|
||||
"exp_value_observable_symbolic": exp_value_observable_symbolic,
|
||||
"_qibo_circuit_to_quimb": _qibo_circuit_to_quimb,
|
||||
"_string_to_quimb_operator": _string_to_quimb_operator,
|
||||
"expectation": expectation,
|
||||
"_expectation_parallel": _expectation_parallel,
|
||||
"circuit_ansatz": circuit_ansatz,
|
||||
}
|
||||
|
||||
|
||||
def _generate_backend(quimb_backend: str = "numpy"):
|
||||
def _generate_backend(quimb_backend: str = "torch"):
|
||||
bases = (QibotnBackend,)
|
||||
|
||||
if quimb_backend == "numpy":
|
||||
@@ -356,9 +667,14 @@ def _generate_backend(quimb_backend: str = "numpy"):
|
||||
|
||||
bases += (NumpyBackend,)
|
||||
elif quimb_backend == "torch":
|
||||
from qiboml.backends import PyTorchBackend
|
||||
try:
|
||||
from qiboml.backends import PyTorchBackend
|
||||
except ImportError:
|
||||
from qibo.backends import NumpyBackend
|
||||
|
||||
bases += (PyTorchBackend,)
|
||||
bases += (NumpyBackend,)
|
||||
else:
|
||||
bases += (PyTorchBackend,)
|
||||
elif quimb_backend == "jax":
|
||||
from qiboml.backends import JaxBackend
|
||||
|
||||
|
||||
477
src/qibotn/backends/vidal.py
Normal file
477
src/qibotn/backends/vidal.py
Normal file
@@ -0,0 +1,477 @@
|
||||
"""Vidal/TEBD fast-path backend with qmatchatea fallback.
|
||||
|
||||
This backend targets MPS-friendly one-dimensional circuits: one-qubit gates and
|
||||
adjacent two-qubit gates, measured with Pauli-sum expectation values. Unsupported
|
||||
features fall back to the qmatchatea backend so the public behavior remains
|
||||
usable while the fast path is expanded.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
from qibo.backends import NumpyBackend
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor, _gate_sites
|
||||
from qibotn.observables import check_observable
|
||||
|
||||
|
||||
def _symbolic_hamiltonian_to_pauli_terms(hamiltonian):
|
||||
terms = []
|
||||
factor_pattern = re.compile(r"([^\d]+)(\d+)")
|
||||
for term in hamiltonian.terms:
|
||||
ops = []
|
||||
for factor in term.factors:
|
||||
match = factor_pattern.match(str(factor))
|
||||
if match is None:
|
||||
raise ValueError(f"Unsupported observable factor {factor!r}.")
|
||||
name = match.group(1).upper()
|
||||
if name not in ("I", "X", "Y", "Z"):
|
||||
raise ValueError(f"Unsupported observable operator {name!r}.")
|
||||
if name != "I":
|
||||
ops.append((name, int(match.group(2))))
|
||||
terms.append((complex(term.coefficient), tuple(ops)))
|
||||
return terms
|
||||
|
||||
|
||||
def _symbolic_hamiltonian_to_operator_terms(hamiltonian):
|
||||
terms = []
|
||||
factor_pattern = re.compile(r"([^\d]+)(\d+)")
|
||||
paulis = {
|
||||
"I": np.eye(2, dtype=np.complex128),
|
||||
"X": np.array([[0, 1], [1, 0]], dtype=np.complex128),
|
||||
"Y": np.array([[0, -1j], [1j, 0]], dtype=np.complex128),
|
||||
"Z": np.array([[1, 0], [0, -1]], dtype=np.complex128),
|
||||
}
|
||||
for term in hamiltonian.terms:
|
||||
ops_by_site = {}
|
||||
for factor in term.factors:
|
||||
site = getattr(factor, "target_qubit", None)
|
||||
matrix = getattr(factor, "matrix", None)
|
||||
if site is None or matrix is None:
|
||||
match = factor_pattern.match(str(factor))
|
||||
if match is None:
|
||||
raise ValueError(f"Unsupported observable factor {factor!r}.")
|
||||
name = match.group(1).upper()
|
||||
if name not in paulis:
|
||||
raise ValueError(f"Unsupported observable operator {name!r}.")
|
||||
site = int(match.group(2))
|
||||
matrix = paulis[name]
|
||||
matrix = np.asarray(matrix, dtype=np.complex128)
|
||||
site = int(site)
|
||||
if site in ops_by_site:
|
||||
ops_by_site[site] = ops_by_site[site] @ matrix
|
||||
else:
|
||||
ops_by_site[site] = matrix
|
||||
terms.append((complex(term.coefficient), tuple(ops_by_site.items())))
|
||||
return terms
|
||||
|
||||
|
||||
def _dense_operator_to_product_terms(coeff, qubits, matrix):
|
||||
"""Expand a dense k-local operator into product-matrix terms.
|
||||
|
||||
The dense matrix basis is ordered by the provided ``qubits`` sequence. For
|
||||
example, ``qubits=[2, 5]`` means matrix rows/columns are ordered as
|
||||
``|q2 q5>``.
|
||||
"""
|
||||
qubits = tuple(int(qubit) for qubit in qubits)
|
||||
if len(set(qubits)) != len(qubits):
|
||||
raise ValueError("Dense observable qubits must be unique.")
|
||||
matrix = np.asarray(matrix, dtype=np.complex128)
|
||||
dim = 2 ** len(qubits)
|
||||
if matrix.shape != (dim, dim):
|
||||
raise ValueError(
|
||||
"Dense observable matrix shape must be "
|
||||
f"({dim}, {dim}) for {len(qubits)} qubits."
|
||||
)
|
||||
|
||||
units = [
|
||||
np.array([[1, 0], [0, 0]], dtype=np.complex128),
|
||||
np.array([[0, 1], [0, 0]], dtype=np.complex128),
|
||||
np.array([[0, 0], [1, 0]], dtype=np.complex128),
|
||||
np.array([[0, 0], [0, 1]], dtype=np.complex128),
|
||||
]
|
||||
terms = []
|
||||
for row in range(dim):
|
||||
for col in range(dim):
|
||||
value = complex(coeff) * complex(matrix[row, col])
|
||||
if value == 0:
|
||||
continue
|
||||
ops = []
|
||||
for offset, site in enumerate(qubits):
|
||||
shift = len(qubits) - offset - 1
|
||||
out_bit = (row >> shift) & 1
|
||||
in_bit = (col >> shift) & 1
|
||||
ops.append((site, units[2 * out_bit + in_bit]))
|
||||
terms.append((value, tuple(ops)))
|
||||
return terms
|
||||
|
||||
|
||||
def _dense_observable_to_operator_terms(observable):
|
||||
if not isinstance(observable, dict):
|
||||
return None
|
||||
|
||||
if "matrix" in observable:
|
||||
terms = [observable]
|
||||
else:
|
||||
terms = observable.get("dense_terms")
|
||||
if terms is None:
|
||||
raw_terms = observable.get("terms")
|
||||
if not raw_terms or not any("matrix" in term for term in raw_terms):
|
||||
return None
|
||||
terms = raw_terms
|
||||
|
||||
operator_terms = []
|
||||
for term in terms:
|
||||
if "matrix" not in term:
|
||||
raise ValueError("Dense observable terms must include a matrix.")
|
||||
qubits = term.get("qubits", term.get("sites"))
|
||||
if qubits is None:
|
||||
raise ValueError("Dense observable terms must include qubits or sites.")
|
||||
operator_terms.extend(
|
||||
_dense_operator_to_product_terms(
|
||||
term.get("coefficient", 1.0),
|
||||
qubits,
|
||||
term["matrix"],
|
||||
)
|
||||
)
|
||||
return operator_terms
|
||||
|
||||
|
||||
def _operator_terms_to_mpo(terms, nqubits):
|
||||
"""Build an exact direct-sum MPO for product-operator terms.
|
||||
|
||||
This intentionally favors correctness and generality over compression: an
|
||||
``m``-term sum becomes an MPO with bond dimension ``m``. Local Hamiltonians
|
||||
can be compressed later without changing the public expectation path.
|
||||
"""
|
||||
identity = np.eye(2, dtype=np.complex128)
|
||||
expanded_terms = []
|
||||
for coeff, ops in terms:
|
||||
local_ops = [identity for _ in range(nqubits)]
|
||||
for site, matrix in ops:
|
||||
site = int(site)
|
||||
if site < 0 or site >= nqubits:
|
||||
raise ValueError(f"Observable site {site} is outside the circuit.")
|
||||
matrix = np.asarray(matrix, dtype=np.complex128)
|
||||
if matrix.shape != (2, 2):
|
||||
raise ValueError("Only qubit local operators with shape (2, 2) are supported.")
|
||||
local_ops[site] = matrix
|
||||
expanded_terms.append((complex(coeff), local_ops))
|
||||
|
||||
if not expanded_terms:
|
||||
raise ValueError("Cannot build an MPO from an empty observable.")
|
||||
|
||||
bond_dim = len(expanded_terms)
|
||||
mpo = []
|
||||
for site in range(nqubits):
|
||||
left_dim = 1 if site == 0 else bond_dim
|
||||
right_dim = 1 if site == nqubits - 1 else bond_dim
|
||||
tensor = np.zeros((left_dim, 2, 2, right_dim), dtype=np.complex128)
|
||||
for term_index, (coeff, local_ops) in enumerate(expanded_terms):
|
||||
left = 0 if site == 0 else term_index
|
||||
right = 0 if site == nqubits - 1 else term_index
|
||||
op = coeff * local_ops[site] if site == 0 else local_ops[site]
|
||||
tensor[left, :, :, right] += op
|
||||
mpo.append(tensor)
|
||||
return mpo
|
||||
|
||||
|
||||
def _observable_mpo_tensors(observable, nqubits=None):
|
||||
if isinstance(observable, dict):
|
||||
if "mpo_tensors" in observable:
|
||||
return observable["mpo_tensors"]
|
||||
if "mpo" in observable:
|
||||
return observable["mpo"]
|
||||
if nqubits is not None:
|
||||
terms = _dense_observable_to_operator_terms(observable)
|
||||
if terms is not None:
|
||||
return _operator_terms_to_mpo(terms, nqubits)
|
||||
return None
|
||||
|
||||
|
||||
def _unsupported_reason(circuit):
|
||||
for gate in circuit.queue:
|
||||
name = getattr(gate, "name", gate.__class__.__name__)
|
||||
sites = _gate_sites(gate)
|
||||
if not sites:
|
||||
return f"gate {name} has no target qubits"
|
||||
if len(sites) > 2:
|
||||
return f"gate {name} acts on {len(sites)} qubits"
|
||||
if len(sites) == 2 and abs(sites[0] - sites[1]) != 1:
|
||||
return f"gate {name} is non-adjacent on qubits {sites}"
|
||||
if not hasattr(gate, "matrix"):
|
||||
return f"gate {name} does not expose a matrix"
|
||||
return None
|
||||
|
||||
|
||||
def _can_route_non_adjacent(circuit):
|
||||
"""True if the circuit's only unsupported feature is non-adjacent 2Q gates.
|
||||
|
||||
SWAP routing can fix non-adjacent gates at compile time. Multi-qubit
|
||||
gates and matrix-less gates are truly unsupported.
|
||||
"""
|
||||
for gate in circuit.queue:
|
||||
sites = _gate_sites(gate)
|
||||
if not sites:
|
||||
return False
|
||||
if len(sites) > 2:
|
||||
return False
|
||||
if not hasattr(gate, "matrix"):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
class _PreparedCircuit:
|
||||
nqubits: int
|
||||
queue: list
|
||||
|
||||
|
||||
def _decompose_gate_for_mps(gate, nqubits, stack=()):
|
||||
sites = _gate_sites(gate)
|
||||
if len(sites) <= 2:
|
||||
return [gate]
|
||||
if gate in stack or not hasattr(gate, "decompose"):
|
||||
name = getattr(gate, "name", gate.__class__.__name__)
|
||||
raise ValueError(f"gate {name} acts on {len(sites)} qubits")
|
||||
|
||||
free = [qubit for qubit in range(nqubits) if qubit not in sites]
|
||||
try:
|
||||
decomposed = gate.decompose(*free, use_toffolis=False, method="standard")
|
||||
except TypeError:
|
||||
decomposed = gate.decompose(*free)
|
||||
if not decomposed or decomposed == [gate]:
|
||||
name = getattr(gate, "name", gate.__class__.__name__)
|
||||
raise ValueError(f"gate {name} could not be decomposed for Vidal MPS")
|
||||
|
||||
result = []
|
||||
for item in decomposed:
|
||||
result.extend(_decompose_gate_for_mps(item, nqubits, stack + (gate,)))
|
||||
return result
|
||||
|
||||
|
||||
def _prepare_circuit_for_mps(circuit, decompose=True):
|
||||
if not decompose:
|
||||
return circuit
|
||||
queue = []
|
||||
for gate in circuit.queue:
|
||||
queue.extend(_decompose_gate_for_mps(gate, circuit.nqubits))
|
||||
return _PreparedCircuit(nqubits=circuit.nqubits, queue=queue)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VidalBackend(QibotnBackend, NumpyBackend):
|
||||
"""QiboTN backend using Vidal/TEBD when possible.
|
||||
|
||||
The fast path supports:
|
||||
- one-qubit gates with ``gate.matrix()``;
|
||||
- adjacent two-qubit gates with ``gate.matrix()``;
|
||||
- Qibo ``SymbolicHamiltonian`` / qibotn dict Pauli-sum expectation values;
|
||||
- MPI chain segmentation through ``mpi_approach="CT"``.
|
||||
|
||||
Unsupported operations are delegated to qmatchatea.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.name = "qibotn"
|
||||
self.platform = "vidal"
|
||||
self.precision = "double"
|
||||
self.rank = 0
|
||||
self.last_truncation_error = 0.0
|
||||
self.last_max_truncation_error = 0.0
|
||||
self.configure_tn_simulation()
|
||||
|
||||
def configure_tn_simulation(
|
||||
self,
|
||||
ansatz: str = "MPS",
|
||||
max_bond_dimension: int | None = 10,
|
||||
cut_ratio: float | None = 1e-9,
|
||||
trunc_tracking_mode: str = "C",
|
||||
svd_control: str = "E!",
|
||||
ini_bond_dimension: int = 1,
|
||||
tensor_module: str = "torch",
|
||||
compile_circuit: bool = False,
|
||||
cache_gate_tensors: bool = True,
|
||||
track_memory: bool = False,
|
||||
mpi_approach: str = "SR",
|
||||
mpi_num_procs: int = 1,
|
||||
mpi_where_barriers: int = -1,
|
||||
mpi_isometrization: int = -1,
|
||||
mpi_term_batch_size: int | None = None,
|
||||
fallback: bool = True,
|
||||
):
|
||||
self.ansatz = ansatz
|
||||
self.max_bond_dimension = max_bond_dimension
|
||||
self.cut_ratio = cut_ratio
|
||||
self.trunc_tracking_mode = trunc_tracking_mode
|
||||
self.svd_control = svd_control
|
||||
self.ini_bond_dimension = ini_bond_dimension
|
||||
self.tensor_module = tensor_module
|
||||
self.compile_circuit = compile_circuit
|
||||
self.cache_gate_tensors = cache_gate_tensors
|
||||
self.track_memory = track_memory
|
||||
self.mpi_approach = mpi_approach.upper()
|
||||
self.mpi_num_procs = mpi_num_procs
|
||||
self.mpi_where_barriers = mpi_where_barriers
|
||||
self.mpi_isometrization = mpi_isometrization
|
||||
self.mpi_term_batch_size = mpi_term_batch_size
|
||||
self.fallback = fallback
|
||||
self._fallback_backend = None
|
||||
|
||||
def _setup_backend_specifics(self):
|
||||
return None
|
||||
|
||||
def _qmatchatea_fallback(self):
|
||||
if self._fallback_backend is None:
|
||||
backend = QMatchaTeaBackend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz=self.ansatz,
|
||||
max_bond_dimension=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
trunc_tracking_mode=self.trunc_tracking_mode,
|
||||
svd_control=self.svd_control,
|
||||
ini_bond_dimension=self.ini_bond_dimension,
|
||||
tensor_module=self.tensor_module,
|
||||
compile_circuit=self.compile_circuit,
|
||||
cache_gate_tensors=self.cache_gate_tensors,
|
||||
track_memory=self.track_memory,
|
||||
mpi_approach=self.mpi_approach,
|
||||
mpi_num_procs=self.mpi_num_procs,
|
||||
mpi_where_barriers=self.mpi_where_barriers,
|
||||
mpi_isometrization=self.mpi_isometrization,
|
||||
)
|
||||
self._fallback_backend = backend
|
||||
return self._fallback_backend
|
||||
|
||||
def _fallback_or_raise(self, reason):
|
||||
if not self.fallback:
|
||||
raise NotImplementedError(reason)
|
||||
return self._qmatchatea_fallback()
|
||||
|
||||
def _preprocess_circuit(self, circuit, compile_circuit):
|
||||
"""Decompose unsupported multi-qubit gates for the local Vidal path."""
|
||||
return _prepare_circuit_for_mps(circuit, decompose=True)
|
||||
|
||||
def _run_fast_executor(self, circuit, compile_circuit=True):
|
||||
if self.mpi_approach == "CT":
|
||||
from mpi4py import MPI
|
||||
|
||||
self.rank = MPI.COMM_WORLD.Get_rank()
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
comm=MPI.COMM_WORLD,
|
||||
)
|
||||
else:
|
||||
self.rank = 0
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit, compile_circuit=compile_circuit)
|
||||
return executor
|
||||
|
||||
def expectation(self, circuit, observable, preprocess=True, compile_circuit=None):
|
||||
if self.ansatz.upper() != "MPS":
|
||||
backend = self._fallback_or_raise("VidalBackend supports only MPS.")
|
||||
return backend.expectation(circuit, observable, preprocess, compile_circuit)
|
||||
|
||||
original_circuit = circuit
|
||||
if compile_circuit is None:
|
||||
compile_circuit = self.compile_circuit
|
||||
|
||||
if preprocess:
|
||||
try:
|
||||
circuit = self._preprocess_circuit(circuit, compile_circuit)
|
||||
except Exception as exc:
|
||||
backend = self._fallback_or_raise(
|
||||
f"VidalBackend preprocessing failed: {exc}"
|
||||
)
|
||||
return backend.expectation(
|
||||
original_circuit, observable, preprocess, compile_circuit
|
||||
)
|
||||
|
||||
reason = _unsupported_reason(circuit)
|
||||
if reason is not None:
|
||||
# Non-adjacent gates can be routed at compile time
|
||||
if compile_circuit and _can_route_non_adjacent(circuit):
|
||||
pass # proceed with Vidal + SWAP routing
|
||||
else:
|
||||
backend = self._fallback_or_raise(reason)
|
||||
return backend.expectation(
|
||||
original_circuit, observable, preprocess, compile_circuit
|
||||
)
|
||||
|
||||
executor = self._run_fast_executor(circuit, compile_circuit=compile_circuit)
|
||||
self.last_truncation_error = float(
|
||||
executor.global_truncation_error()
|
||||
if hasattr(executor, "global_truncation_error")
|
||||
else executor.truncation_error
|
||||
)
|
||||
self.last_max_truncation_error = float(
|
||||
executor.global_max_truncation_error()
|
||||
if hasattr(executor, "global_max_truncation_error")
|
||||
else executor.max_truncation_error
|
||||
)
|
||||
|
||||
mpo_tensors = _observable_mpo_tensors(observable, circuit.nqubits)
|
||||
if mpo_tensors is not None:
|
||||
if self.mpi_approach == "CT":
|
||||
value = executor.expectation_mpo_root(mpo_tensors)
|
||||
from qtealeaves.tooling.mpisupport import MPI
|
||||
|
||||
if MPI is not None and MPI.COMM_WORLD.Get_rank() != 0:
|
||||
return np.nan
|
||||
return value
|
||||
return executor.expectation_mpo(mpo_tensors)
|
||||
|
||||
hamiltonian = check_observable(observable, circuit.nqubits)
|
||||
try:
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(hamiltonian)
|
||||
except ValueError as exc:
|
||||
backend = self._fallback_or_raise(str(exc))
|
||||
return backend.expectation(
|
||||
original_circuit, observable, preprocess, compile_circuit
|
||||
)
|
||||
|
||||
mpo_tensors = _operator_terms_to_mpo(terms, circuit.nqubits)
|
||||
if self.mpi_approach == "CT":
|
||||
value = executor.expectation_mpo_root(mpo_tensors)
|
||||
from qtealeaves.tooling.mpisupport import MPI
|
||||
|
||||
if MPI is not None and MPI.COMM_WORLD.Get_rank() != 0:
|
||||
return np.nan
|
||||
return value
|
||||
return executor.expectation_mpo(mpo_tensors)
|
||||
|
||||
def execute_circuit(
|
||||
self,
|
||||
circuit,
|
||||
initial_state=None,
|
||||
nshots=None,
|
||||
prob_type=None,
|
||||
return_array=False,
|
||||
**prob_kwargs,
|
||||
):
|
||||
backend = self._fallback_or_raise(
|
||||
"VidalBackend.execute_circuit is delegated to qmatchatea."
|
||||
)
|
||||
return backend.execute_circuit(
|
||||
circuit,
|
||||
initial_state=initial_state,
|
||||
nshots=nshots,
|
||||
prob_type=prob_type,
|
||||
return_array=return_array,
|
||||
**prob_kwargs,
|
||||
)
|
||||
524
src/qibotn/backends/vidal_mpi_segment.py
Normal file
524
src/qibotn/backends/vidal_mpi_segment.py
Normal file
@@ -0,0 +1,524 @@
|
||||
"""Segmented MPI Vidal/TEBD executor.
|
||||
|
||||
Each rank owns a contiguous interval of sites. Gates fully inside an interval
|
||||
are applied locally. Only two-site gates crossing a rank boundary communicate
|
||||
the neighboring edge tensor and the resulting boundary update.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
|
||||
from qibotn.backends.vidal_tebd import (
|
||||
_asarray,
|
||||
_backend_module,
|
||||
_build_theta_svd_matrix,
|
||||
_disjoint_batches,
|
||||
_fuse_one_site_blocks,
|
||||
_gate_sites,
|
||||
_is_two_qubit_batch,
|
||||
_make_two_site_update,
|
||||
_ones,
|
||||
_real_if_close,
|
||||
_route_non_adjacent_gates,
|
||||
_svd,
|
||||
_tensor_update_from_numpy,
|
||||
_tensor_update_to_numpy,
|
||||
_to_float,
|
||||
_to_numpy,
|
||||
_transpose,
|
||||
VidalTEBDExecutor,
|
||||
)
|
||||
|
||||
_EDGE_TAG = 1701
|
||||
_UPDATE_TAG = 1702
|
||||
_EXPECT_ENV_TAG = 1703
|
||||
_EXPECT_RESULT_TAG = 1704
|
||||
|
||||
|
||||
def _partition_sites(nsites, nranks):
|
||||
base = nsites // nranks
|
||||
rem = nsites % nranks
|
||||
starts = [0]
|
||||
for rank in range(nranks):
|
||||
starts.append(starts[-1] + base + int(rank < rem))
|
||||
return starts
|
||||
|
||||
|
||||
@dataclass
|
||||
class SegmentVidalMPIExecutor:
|
||||
nqubits: int
|
||||
max_bond: int | None
|
||||
comm: object
|
||||
cut_ratio: float | None = 1e-12
|
||||
tensor_module: str = "torch"
|
||||
|
||||
def __post_init__(self):
|
||||
self.rank = self.comm.Get_rank()
|
||||
self.size = self.comm.Get_size()
|
||||
self.starts = _partition_sites(self.nqubits, self.size)
|
||||
self.start = self.starts[self.rank]
|
||||
self.end = self.starts[self.rank + 1]
|
||||
if self.start == self.end:
|
||||
raise ValueError("SegmentVidalMPIExecutor requires at least one site per rank.")
|
||||
|
||||
from qibotn.backends.cpu import _bind_numa_node
|
||||
|
||||
self.numa_domain = _bind_numa_node(self.rank)
|
||||
|
||||
self.xp = _backend_module(self.tensor_module)
|
||||
if self.xp is np:
|
||||
self.dtype = np.complex128
|
||||
self.device = None
|
||||
else:
|
||||
self.dtype = self.xp.complex128
|
||||
self.device = self.xp.device("cpu")
|
||||
|
||||
self.gammas = {}
|
||||
for site in range(self.start, self.end):
|
||||
self.gammas[site] = _asarray(
|
||||
self.xp, [[[1.0 + 0.0j], [0.0 + 0.0j]]], self.dtype
|
||||
)
|
||||
|
||||
self.lambdas = {
|
||||
bond: _ones(self.xp, 1, self.dtype, self.device)
|
||||
for bond in range(self.start, self.end + 1)
|
||||
}
|
||||
self._accumulated_truncation_error = 0.0
|
||||
self._max_truncation_error = 0.0
|
||||
|
||||
@property
|
||||
def truncation_error(self):
|
||||
return self._accumulated_truncation_error
|
||||
|
||||
def global_truncation_error(self):
|
||||
return self.comm.allreduce(self._accumulated_truncation_error, op=MPI.SUM)
|
||||
|
||||
@property
|
||||
def max_truncation_error(self):
|
||||
return self._max_truncation_error
|
||||
|
||||
def global_max_truncation_error(self):
|
||||
return self.comm.allreduce(self._max_truncation_error, op=MPI.MAX)
|
||||
|
||||
def owns_site(self, site):
|
||||
return self.start <= site < self.end
|
||||
|
||||
def owner_of(self, site):
|
||||
return int(np.searchsorted(self.starts, site, side="right") - 1)
|
||||
|
||||
def run_circuit(self, circuit, compile_circuit=True):
|
||||
timings = {
|
||||
"local_compute": 0.0,
|
||||
"edge_exchange": 0.0,
|
||||
"boundary_compute": 0.0,
|
||||
"boundary_update": 0.0,
|
||||
"one_site": 0.0,
|
||||
"gather": 0.0,
|
||||
}
|
||||
|
||||
gates = circuit.queue
|
||||
if compile_circuit:
|
||||
gates = _route_non_adjacent_gates(gates, circuit.nqubits)
|
||||
gates = _fuse_one_site_blocks(gates)
|
||||
for batch in _disjoint_batches(gates):
|
||||
if _is_two_qubit_batch(batch):
|
||||
self._apply_two_site_batch(batch, timings)
|
||||
else:
|
||||
tic = time.perf_counter()
|
||||
for gate in batch:
|
||||
sites = _gate_sites(gate)
|
||||
if len(sites) == 1 and self.owns_site(sites[0]):
|
||||
op = _asarray(self.xp, gate.matrix(), self.dtype)
|
||||
self.apply_one_site(op, sites[0])
|
||||
elif len(sites) == 2:
|
||||
self._apply_two_site_batch([gate], timings)
|
||||
elif len(sites) > 2:
|
||||
raise NotImplementedError("Only one- and two-qubit gates are supported.")
|
||||
timings["one_site"] += time.perf_counter() - tic
|
||||
|
||||
return timings
|
||||
|
||||
def apply_one_site(self, op, pos):
|
||||
self.gammas[pos] = self.xp.einsum("st,atb->asb", op, self.gammas[pos])
|
||||
|
||||
def _apply_two_site_batch(self, batch, timings):
|
||||
local_gates = []
|
||||
boundary_specs = []
|
||||
recv_left_update = False
|
||||
for gate in batch:
|
||||
sites = _gate_sites(gate)
|
||||
if abs(sites[0] - sites[1]) != 1:
|
||||
raise NotImplementedError("Segment Vidal supports adjacent two-qubit gates only.")
|
||||
left, right = sorted(sites)
|
||||
left_owner = self.owner_of(left)
|
||||
right_owner = self.owner_of(right)
|
||||
if left_owner == self.rank and right_owner == self.rank:
|
||||
local_gates.append(gate)
|
||||
elif left_owner == self.rank:
|
||||
boundary_specs.append((gate, left, right))
|
||||
elif right_owner == self.rank:
|
||||
recv_left_update = True
|
||||
|
||||
tic = time.perf_counter()
|
||||
edge_send_req = None
|
||||
if recv_left_update:
|
||||
edge_send_req = self.comm.isend(
|
||||
self._edge_payload(), dest=self.rank - 1, tag=_EDGE_TAG
|
||||
)
|
||||
right_edge = (
|
||||
self.comm.recv(source=self.rank + 1, tag=_EDGE_TAG)
|
||||
if boundary_specs
|
||||
else None
|
||||
)
|
||||
timings["edge_exchange"] += time.perf_counter() - tic
|
||||
|
||||
boundary_update = None
|
||||
tic = time.perf_counter()
|
||||
for gate, left, right in boundary_specs:
|
||||
boundary_update = self._compute_boundary_update(
|
||||
gate, left, right, right_edge
|
||||
)
|
||||
timings["boundary_compute"] += time.perf_counter() - tic
|
||||
|
||||
tic = time.perf_counter()
|
||||
update_send_req = None
|
||||
if boundary_update is not None:
|
||||
update_send_req = self.comm.isend(
|
||||
boundary_update, dest=self.rank + 1, tag=_UPDATE_TAG
|
||||
)
|
||||
timings["boundary_update"] += time.perf_counter() - tic
|
||||
|
||||
tic = time.perf_counter()
|
||||
local_items = [
|
||||
self._compute_owned_two_site_update(gate)
|
||||
for gate in local_gates
|
||||
]
|
||||
timings["local_compute"] += time.perf_counter() - tic
|
||||
|
||||
tic = time.perf_counter()
|
||||
left_boundary_update = (
|
||||
self.comm.recv(source=self.rank - 1, tag=_UPDATE_TAG)
|
||||
if recv_left_update
|
||||
else None
|
||||
)
|
||||
if update_send_req is not None:
|
||||
update_send_req.wait()
|
||||
if edge_send_req is not None:
|
||||
edge_send_req.wait()
|
||||
timings["boundary_update"] += time.perf_counter() - tic
|
||||
|
||||
for update in local_items:
|
||||
self._install_update(update)
|
||||
if boundary_update is not None:
|
||||
self._install_update(boundary_update)
|
||||
if left_boundary_update is not None:
|
||||
self._install_update(left_boundary_update)
|
||||
|
||||
def _edge_payload(self):
|
||||
return {
|
||||
"start": self.start,
|
||||
"end": self.end,
|
||||
"gamma_start": _to_numpy(self.gammas[self.start]),
|
||||
"lambda_after_start": _to_numpy(self.lambdas[self.start + 1]),
|
||||
}
|
||||
|
||||
def _compute_owned_two_site_update(self, gate):
|
||||
sites = _gate_sites(gate)
|
||||
op = _asarray(self.xp, gate.matrix(), self.dtype)
|
||||
left, right = sites
|
||||
if left > right:
|
||||
left, right = right, left
|
||||
op = _transpose(self.xp, op.reshape(2, 2, 2, 2), (1, 0, 3, 2)).reshape(4, 4)
|
||||
item = self._build_item(
|
||||
left,
|
||||
op,
|
||||
self.lambdas[left],
|
||||
self.lambdas[left + 1],
|
||||
self.lambdas[left + 2],
|
||||
self.gammas[left],
|
||||
self.gammas[right],
|
||||
)
|
||||
split = _svd(self.xp, item["matrix"])
|
||||
return _make_two_site_update(
|
||||
item, *split, self.max_bond, self.cut_ratio, self.xp
|
||||
)
|
||||
|
||||
def _compute_boundary_update(self, gate, left, right, remote):
|
||||
op = _asarray(self.xp, gate.matrix(), self.dtype)
|
||||
sites = _gate_sites(gate)
|
||||
if sites[0] > sites[1]:
|
||||
op = _transpose(self.xp, op.reshape(2, 2, 2, 2), (1, 0, 3, 2)).reshape(4, 4)
|
||||
|
||||
gamma_right = _asarray(self.xp, remote["gamma_start"], self.dtype)
|
||||
lam_right = _asarray(
|
||||
self.xp,
|
||||
remote["lambda_after_start"],
|
||||
self.xp.float64 if self.xp is not np else np.float64,
|
||||
)
|
||||
item = self._build_item(
|
||||
left,
|
||||
op,
|
||||
self.lambdas[left],
|
||||
self.lambdas[left + 1],
|
||||
lam_right,
|
||||
self.gammas[left],
|
||||
gamma_right,
|
||||
)
|
||||
split = _svd(self.xp, item["matrix"])
|
||||
return _tensor_update_to_numpy(
|
||||
_make_two_site_update(
|
||||
item, *split, self.max_bond, self.cut_ratio, self.xp
|
||||
)
|
||||
)
|
||||
|
||||
def _build_item(self, site, op, lam_left, lam_mid, lam_right, gamma_left, gamma_right):
|
||||
result = _build_theta_svd_matrix(
|
||||
op, self.xp, lam_left, lam_mid, lam_right, gamma_left, gamma_right
|
||||
)
|
||||
result["site"] = site
|
||||
result["lam_left"] = lam_left
|
||||
result["lam_right"] = lam_right
|
||||
return result
|
||||
|
||||
def _install_update(self, update):
|
||||
if isinstance(update["left"], np.ndarray):
|
||||
update = _tensor_update_from_numpy(self.xp, update, self.dtype)
|
||||
truncation_error = update.get("truncation_error", 0.0)
|
||||
self._accumulated_truncation_error += truncation_error
|
||||
self._max_truncation_error = max(
|
||||
self._max_truncation_error,
|
||||
truncation_error,
|
||||
)
|
||||
site = update["site"]
|
||||
if self.owns_site(site):
|
||||
self.gammas[site] = update["left"]
|
||||
if self.owns_site(site + 1):
|
||||
self.gammas[site + 1] = update["right"]
|
||||
if self.start <= site + 1 <= self.end:
|
||||
self.lambdas[site + 1] = update["lambda"]
|
||||
|
||||
def gather_full_state(self):
|
||||
payload = {
|
||||
"start": self.start,
|
||||
"end": self.end,
|
||||
"gammas": {site: _to_numpy(tensor) for site, tensor in self.gammas.items()},
|
||||
"lambdas": {bond: _to_numpy(tensor) for bond, tensor in self.lambdas.items()},
|
||||
}
|
||||
return self.comm.gather(payload, root=0)
|
||||
|
||||
def expectation_pauli_sum_root(self, terms, term_batch_size=None):
|
||||
paulis = {
|
||||
"I": self._eye(2),
|
||||
"X": _asarray(self.xp, [[0, 1], [1, 0]], self.dtype),
|
||||
"Y": _asarray(self.xp, [[0, -1j], [1j, 0]], self.dtype),
|
||||
"Z": _asarray(self.xp, [[1, 0], [0, -1]], self.dtype),
|
||||
}
|
||||
operator_terms = [
|
||||
(
|
||||
coeff,
|
||||
tuple((site, paulis[name.upper()]) for name, site in ops),
|
||||
)
|
||||
for coeff, ops in terms
|
||||
]
|
||||
return self.expectation_operator_sum_root(
|
||||
operator_terms,
|
||||
term_batch_size=term_batch_size,
|
||||
)
|
||||
|
||||
def expectation_operator_sum_root(self, terms, term_batch_size=None):
|
||||
if term_batch_size is None:
|
||||
term_batch_size = max(1, len(terms))
|
||||
norm = self._distributed_product_expectation({})
|
||||
total = 0.0 + 0.0j
|
||||
for start in range(0, len(terms), int(term_batch_size)):
|
||||
batch = terms[start : start + int(term_batch_size)]
|
||||
values = self._distributed_operator_batch_expectation(batch, norm)
|
||||
if self.rank == 0:
|
||||
for (coeff, _), term_value in zip(batch, values):
|
||||
total += complex(coeff) * complex(term_value)
|
||||
return None if self.rank != 0 else _real_if_close(total / norm)
|
||||
|
||||
def _eye(self, size):
|
||||
if self.xp is np:
|
||||
return np.eye(size, dtype=self.dtype)
|
||||
return self.xp.eye(size, dtype=self.dtype, device=self.device)
|
||||
|
||||
def _distributed_product_expectation(self, operators):
|
||||
if self.rank == 0:
|
||||
env = self._segment_product_environment(operators)
|
||||
if self.size == 1:
|
||||
return env.reshape(-1)[0]
|
||||
self.comm.send(_to_numpy(env), dest=1, tag=_EXPECT_ENV_TAG)
|
||||
return self.comm.recv(source=self.size - 1, tag=_EXPECT_RESULT_TAG)
|
||||
|
||||
incoming = self.comm.recv(source=self.rank - 1, tag=_EXPECT_ENV_TAG)
|
||||
env = self._segment_product_environment(operators, incoming)
|
||||
if self.rank == self.size - 1:
|
||||
self.comm.send(_to_numpy(env).reshape(-1)[0], dest=0, tag=_EXPECT_RESULT_TAG)
|
||||
else:
|
||||
self.comm.send(_to_numpy(env), dest=self.rank + 1, tag=_EXPECT_ENV_TAG)
|
||||
return None
|
||||
|
||||
def _segment_product_environment(self, operators, incoming=None):
|
||||
if incoming is None:
|
||||
env = _asarray(
|
||||
self.xp,
|
||||
np.eye(len(self.lambdas[self.start]), dtype=np.complex128),
|
||||
self.dtype,
|
||||
)
|
||||
else:
|
||||
env = _asarray(self.xp, incoming, self.dtype)
|
||||
|
||||
identity = self._eye(2)
|
||||
for site in range(self.start, self.end):
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
op = operators.get(site, identity)
|
||||
env = self.xp.einsum(
|
||||
"xy,xsb,st,ytd->bd", env, self._conj(tensor), op, tensor
|
||||
)
|
||||
return env
|
||||
|
||||
def _distributed_operator_batch_expectation(self, terms, norm):
|
||||
if not terms:
|
||||
return []
|
||||
if all(not ops for _, ops in terms):
|
||||
return [norm] * len(terms) if self.rank == 0 else None
|
||||
|
||||
batch_ops = [
|
||||
{int(site): _asarray(self.xp, matrix, self.dtype) for site, matrix in ops}
|
||||
for _, ops in terms
|
||||
]
|
||||
if self.rank == 0:
|
||||
env = self._segment_operator_batch_environment(batch_ops)
|
||||
if self.size == 1:
|
||||
return list(env.reshape(len(terms), -1)[:, 0])
|
||||
self.comm.send(_to_numpy(env), dest=1, tag=_EXPECT_ENV_TAG)
|
||||
return self.comm.recv(source=self.size - 1, tag=_EXPECT_RESULT_TAG)
|
||||
|
||||
incoming = self.comm.recv(source=self.rank - 1, tag=_EXPECT_ENV_TAG)
|
||||
env = self._segment_operator_batch_environment(batch_ops, incoming)
|
||||
if self.rank == self.size - 1:
|
||||
values = list(_to_numpy(env).reshape(len(terms), -1)[:, 0])
|
||||
self.comm.send(values, dest=0, tag=_EXPECT_RESULT_TAG)
|
||||
else:
|
||||
self.comm.send(_to_numpy(env), dest=self.rank + 1, tag=_EXPECT_ENV_TAG)
|
||||
return None
|
||||
|
||||
def _segment_operator_batch_environment(self, batch_ops, incoming=None):
|
||||
batch_size = len(batch_ops)
|
||||
if incoming is None:
|
||||
dim = len(self.lambdas[self.start])
|
||||
env = _asarray(
|
||||
self.xp,
|
||||
np.tile(np.eye(dim, dtype=np.complex128), (batch_size, 1, 1)),
|
||||
self.dtype,
|
||||
)
|
||||
else:
|
||||
env = _asarray(self.xp, incoming, self.dtype)
|
||||
|
||||
identity = self._eye(2)
|
||||
for site in range(self.start, self.end):
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
ops = self.xp.stack(
|
||||
[operators.get(site, identity) for operators in batch_ops],
|
||||
axis=0,
|
||||
)
|
||||
env = self.xp.einsum(
|
||||
"nxy,xsb,nst,ytd->nbd",
|
||||
env,
|
||||
self._conj(tensor),
|
||||
ops,
|
||||
tensor,
|
||||
)
|
||||
return env
|
||||
|
||||
def _conj(self, tensor):
|
||||
return np.conjugate(tensor) if self.xp is np else tensor.conj()
|
||||
|
||||
def expectation_mpo_root(self, mpo_tensors):
|
||||
if len(mpo_tensors) != self.nqubits:
|
||||
raise ValueError(
|
||||
f"Expected {self.nqubits} MPO tensors, got {len(mpo_tensors)}."
|
||||
)
|
||||
norm = self._distributed_product_expectation({})
|
||||
if self.rank == 0:
|
||||
env = self._segment_mpo_environment(mpo_tensors)
|
||||
if self.size == 1:
|
||||
return _real_if_close(env.reshape(-1)[0] / norm)
|
||||
self.comm.send(_to_numpy(env), dest=1, tag=_EXPECT_ENV_TAG)
|
||||
value = self.comm.recv(source=self.size - 1, tag=_EXPECT_RESULT_TAG)
|
||||
return _real_if_close(value / norm)
|
||||
|
||||
incoming = self.comm.recv(source=self.rank - 1, tag=_EXPECT_ENV_TAG)
|
||||
env = self._segment_mpo_environment(mpo_tensors, incoming)
|
||||
if self.rank == self.size - 1:
|
||||
self.comm.send(
|
||||
_to_numpy(env).reshape(-1)[0],
|
||||
dest=0,
|
||||
tag=_EXPECT_RESULT_TAG,
|
||||
)
|
||||
else:
|
||||
self.comm.send(_to_numpy(env), dest=self.rank + 1, tag=_EXPECT_ENV_TAG)
|
||||
return None
|
||||
|
||||
def _segment_mpo_environment(self, mpo_tensors, incoming=None):
|
||||
if incoming is None:
|
||||
left_dim = len(self.lambdas[self.start])
|
||||
env = _asarray(
|
||||
self.xp,
|
||||
np.zeros((left_dim, 1, left_dim), dtype=np.complex128),
|
||||
self.dtype,
|
||||
)
|
||||
env[:, 0, :] = self._eye(left_dim)
|
||||
else:
|
||||
env = _asarray(self.xp, incoming, self.dtype)
|
||||
|
||||
for site in range(self.start, self.end):
|
||||
mpo = _asarray(self.xp, mpo_tensors[site], self.dtype)
|
||||
if mpo.ndim != 4 or mpo.shape[1:3] != (2, 2):
|
||||
raise ValueError(
|
||||
"Each MPO tensor must have shape "
|
||||
"(left_bond, 2, 2, right_bond)."
|
||||
)
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
env = self.xp.einsum(
|
||||
"xlc,xub,lutr,ctd->brd",
|
||||
env,
|
||||
self._conj(tensor),
|
||||
mpo,
|
||||
tensor,
|
||||
)
|
||||
return env
|
||||
|
||||
def expectation_ring_xz_root(self):
|
||||
terms = [
|
||||
(0.5, (("X", site), ("Z", (site + 1) % self.nqubits)))
|
||||
for site in range(self.nqubits)
|
||||
]
|
||||
return self.expectation_pauli_sum_root(terms)
|
||||
|
||||
|
||||
def run_segment_vidal_mpi_ring_xz(
|
||||
circuit,
|
||||
max_bond,
|
||||
comm,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module="torch",
|
||||
):
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=max_bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
timings = executor.run_circuit(circuit)
|
||||
tic = time.perf_counter()
|
||||
value = executor.expectation_ring_xz_root()
|
||||
timings["gather"] = time.perf_counter() - tic
|
||||
return value, timings
|
||||
605
src/qibotn/backends/vidal_tebd.py
Normal file
605
src/qibotn/backends/vidal_tebd.py
Normal file
@@ -0,0 +1,605 @@
|
||||
"""Vidal/TEBD MPS executor for layer-parallel circuit simulation.
|
||||
|
||||
This module is intentionally small and focused on the circuit family used by the
|
||||
MPS benchmarks: one-qubit gates and adjacent two-qubit gates on a 1D chain. It
|
||||
keeps the state in Vidal form, so gates acting on disjoint bonds can be applied
|
||||
in parallel without moving a global mixed-canonical center.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _backend_module(tensor_module):
|
||||
if tensor_module == "torch":
|
||||
import torch
|
||||
|
||||
return torch
|
||||
if tensor_module == "numpy":
|
||||
return np
|
||||
raise ValueError(f"Unsupported tensor module {tensor_module!r}.")
|
||||
|
||||
|
||||
def _asarray(xp, value, dtype):
|
||||
if xp is np:
|
||||
return np.asarray(value, dtype=dtype)
|
||||
return xp.as_tensor(value, dtype=dtype)
|
||||
|
||||
|
||||
def _ones(xp, size, dtype, device=None):
|
||||
if xp is np:
|
||||
return np.ones(size, dtype=np.float64 if dtype == np.complex128 else np.float32)
|
||||
real_dtype = xp.float64 if dtype == xp.complex128 else xp.float32
|
||||
return xp.ones(size, dtype=real_dtype, device=device)
|
||||
|
||||
|
||||
def _eye(xp, size, dtype, device=None):
|
||||
if xp is np:
|
||||
return np.eye(size, dtype=dtype)
|
||||
return xp.eye(size, dtype=dtype, device=device)
|
||||
|
||||
|
||||
def _conj(xp, tensor):
|
||||
return np.conjugate(tensor) if xp is np else tensor.conj()
|
||||
|
||||
|
||||
def _transpose(xp, tensor, axes):
|
||||
return np.transpose(tensor, axes) if xp is np else tensor.permute(*axes)
|
||||
|
||||
|
||||
def _vdot(xp, left, right):
|
||||
if xp is np:
|
||||
return np.vdot(left.reshape(-1), right.reshape(-1))
|
||||
return xp.vdot(left.reshape(-1), right.reshape(-1))
|
||||
|
||||
|
||||
def _to_float(x):
|
||||
if hasattr(x, "detach"):
|
||||
return float(x.detach().cpu().item())
|
||||
return float(x)
|
||||
|
||||
|
||||
def _to_scalar(x):
|
||||
if hasattr(x, "detach"):
|
||||
return x.detach().cpu().item()
|
||||
if isinstance(x, np.ndarray):
|
||||
return x.item()
|
||||
return x
|
||||
|
||||
|
||||
def _real_if_close(x, tol=1000):
|
||||
value = np.real_if_close(x, tol=tol)
|
||||
return value.item() if isinstance(value, np.ndarray) else value
|
||||
|
||||
|
||||
def _to_numpy(tensor):
|
||||
if hasattr(tensor, "detach"):
|
||||
return tensor.detach().cpu().numpy()
|
||||
return np.asarray(tensor)
|
||||
|
||||
|
||||
def _tensor_update_to_numpy(update):
|
||||
result = {
|
||||
"site": int(update["site"]),
|
||||
"left": _to_numpy(update["left"]),
|
||||
"right": _to_numpy(update["right"]),
|
||||
"lambda": _to_numpy(update["lambda"]),
|
||||
}
|
||||
if "truncation_error" in update:
|
||||
result["truncation_error"] = float(update["truncation_error"])
|
||||
return result
|
||||
|
||||
|
||||
def _tensor_update_from_numpy(xp, update, dtype):
|
||||
if xp is np:
|
||||
return update
|
||||
result = {
|
||||
"site": update["site"],
|
||||
"left": _asarray(xp, update["left"], dtype),
|
||||
"right": _asarray(xp, update["right"], dtype),
|
||||
"lambda": xp.as_tensor(
|
||||
update["lambda"],
|
||||
dtype=xp.float64 if dtype == xp.complex128 else xp.float32,
|
||||
),
|
||||
}
|
||||
if "truncation_error" in update:
|
||||
result["truncation_error"] = float(update["truncation_error"])
|
||||
return result
|
||||
|
||||
|
||||
def _svd(xp, matrix):
|
||||
return _svd_eigh(xp, matrix)
|
||||
|
||||
|
||||
def _svd_eigh(xp, matrix):
|
||||
"""SVD through Hermitian eigendecomposition.
|
||||
|
||||
This mirrors the E-style path that is fast for the benchmark matrices and
|
||||
avoids torch's slower general-purpose SVD for many small/medium splits.
|
||||
"""
|
||||
|
||||
m_dim, n_dim = matrix.shape
|
||||
if m_dim <= n_dim:
|
||||
gram = matrix @ _conj(xp, matrix).T
|
||||
eigvals, eigvecs = _eigh(xp, gram)
|
||||
eigvals, eigvecs = _sort_eigh_desc(xp, eigvals, eigvecs)
|
||||
singvals = _sqrt_clamped(xp, eigvals)
|
||||
inv_s = _safe_inverse(xp, singvals)
|
||||
vh = (_conj(xp, eigvecs).T @ matrix) * inv_s.reshape(-1, 1)
|
||||
return eigvecs, singvals, vh
|
||||
|
||||
gram = _conj(xp, matrix).T @ matrix
|
||||
eigvals, eigvecs = _eigh(xp, gram)
|
||||
eigvals, eigvecs = _sort_eigh_desc(xp, eigvals, eigvecs)
|
||||
singvals = _sqrt_clamped(xp, eigvals)
|
||||
inv_s = _safe_inverse(xp, singvals)
|
||||
umat = (matrix @ eigvecs) * inv_s.reshape(1, -1)
|
||||
return umat, singvals, _conj(xp, eigvecs).T
|
||||
|
||||
|
||||
def _eigh(xp, matrix):
|
||||
if xp is np:
|
||||
return np.linalg.eigh(matrix)
|
||||
return xp.linalg.eigh(matrix)
|
||||
|
||||
|
||||
def _sort_eigh_desc(xp, eigvals, eigvecs):
|
||||
if xp is np:
|
||||
return eigvals[::-1].copy(), eigvecs[:, ::-1].copy()
|
||||
return xp.flip(eigvals, dims=(0,)), xp.flip(eigvecs, dims=(1,))
|
||||
|
||||
|
||||
def _sqrt_clamped(xp, eigvals):
|
||||
if xp is np:
|
||||
return np.sqrt(np.maximum(eigvals.real, 0.0))
|
||||
return xp.sqrt(xp.clamp(eigvals.real, min=0.0))
|
||||
|
||||
|
||||
def _safe_inverse(xp, values):
|
||||
if xp is np:
|
||||
return np.where(values > 1e-300, 1.0 / values, 0.0)
|
||||
return xp.where(values > 1e-300, 1.0 / values, xp.zeros_like(values))
|
||||
|
||||
|
||||
@dataclass
|
||||
class VidalTEBDExecutor:
|
||||
nqubits: int
|
||||
max_bond: int | None
|
||||
cut_ratio: float | None = 1e-12
|
||||
tensor_module: str = "torch"
|
||||
|
||||
def __post_init__(self):
|
||||
self.xp = _backend_module(self.tensor_module)
|
||||
if self.xp is np:
|
||||
self.dtype = np.complex128
|
||||
self.device = None
|
||||
else:
|
||||
self.dtype = self.xp.complex128
|
||||
self.device = self.xp.device("cpu")
|
||||
|
||||
self.gammas = []
|
||||
for _ in range(self.nqubits):
|
||||
tensor = _asarray(self.xp, [[[1.0 + 0.0j], [0.0 + 0.0j]]], self.dtype)
|
||||
self.gammas.append(tensor)
|
||||
self.lambdas = [
|
||||
_ones(self.xp, 1, self.dtype, self.device) for _ in range(self.nqubits + 1)
|
||||
]
|
||||
self._accumulated_truncation_error = 0.0
|
||||
self._max_truncation_error = 0.0
|
||||
|
||||
def run_circuit(self, circuit, compile_circuit=True):
|
||||
gates = circuit.queue
|
||||
if compile_circuit:
|
||||
gates = _route_non_adjacent_gates(gates, circuit.nqubits)
|
||||
gates = _fuse_one_site_blocks(gates)
|
||||
for batch in _disjoint_batches(gates):
|
||||
for gate in batch:
|
||||
self._apply_gate(gate)
|
||||
|
||||
@property
|
||||
def truncation_error(self):
|
||||
return self._accumulated_truncation_error
|
||||
|
||||
@property
|
||||
def max_truncation_error(self):
|
||||
return self._max_truncation_error
|
||||
|
||||
def _apply_gate(self, gate):
|
||||
sites = _gate_sites(gate)
|
||||
matrix = _asarray(self.xp, gate.matrix(), self.dtype)
|
||||
if len(sites) == 1:
|
||||
self.apply_one_site(matrix, sites[0])
|
||||
elif len(sites) == 2:
|
||||
if abs(sites[0] - sites[1]) != 1:
|
||||
raise NotImplementedError("VidalTEBDExecutor supports adjacent gates only.")
|
||||
self.apply_two_site(matrix, sites[0], sites[1])
|
||||
else:
|
||||
raise NotImplementedError("Only one- and two-qubit gates are supported.")
|
||||
|
||||
def apply_one_site(self, op, pos):
|
||||
# op[out_phys, in_phys] * gamma[left, in_phys, right]
|
||||
self.gammas[pos] = self.xp.einsum("st,atb->asb", op, self.gammas[pos])
|
||||
|
||||
def apply_two_site(self, op, left_pos, right_pos):
|
||||
item = self._build_two_site_matrix(op, left_pos, right_pos)
|
||||
umat, singvals, vh = _svd(self.xp, item["matrix"])
|
||||
self._install_two_site_split(item, umat, singvals, vh)
|
||||
|
||||
def _build_two_site_matrix(self, op, left_pos, right_pos):
|
||||
if left_pos > right_pos:
|
||||
left_pos, right_pos = right_pos, left_pos
|
||||
op = _transpose(self.xp, op.reshape(2, 2, 2, 2), (1, 0, 3, 2)).reshape(
|
||||
4, 4
|
||||
)
|
||||
|
||||
i = left_pos
|
||||
result = _build_theta_svd_matrix(
|
||||
op, self.xp,
|
||||
self.lambdas[i], self.lambdas[i + 1], self.lambdas[i + 2],
|
||||
self.gammas[i], self.gammas[i + 1],
|
||||
)
|
||||
result["site"] = i
|
||||
result["lam_left"] = self.lambdas[i]
|
||||
result["lam_right"] = self.lambdas[i + 2]
|
||||
return result
|
||||
|
||||
def _install_two_site_split(self, item, umat, singvals, vh):
|
||||
update = _make_two_site_update(item, umat, singvals, vh,
|
||||
self.max_bond, self.cut_ratio, self.xp)
|
||||
self._accumulated_truncation_error += update["truncation_error"]
|
||||
self._max_truncation_error = max(
|
||||
self._max_truncation_error,
|
||||
update["truncation_error"],
|
||||
)
|
||||
i = update["site"]
|
||||
self.gammas[i] = update["left"]
|
||||
self.gammas[i + 1] = update["right"]
|
||||
self.lambdas[i + 1] = update["lambda"]
|
||||
|
||||
def expectation_ring_xz(self):
|
||||
return self.expectation_pauli_sum(
|
||||
[
|
||||
(0.5, (("X", site), ("Z", (site + 1) % self.nqubits)))
|
||||
for site in range(self.nqubits)
|
||||
]
|
||||
)
|
||||
|
||||
def expectation_pauli_sum(self, terms):
|
||||
paulis = {
|
||||
"I": _eye(self.xp, 2, self.dtype, self.device),
|
||||
"X": _asarray(self.xp, [[0, 1], [1, 0]], self.dtype),
|
||||
"Y": _asarray(self.xp, [[0, -1j], [1j, 0]], self.dtype),
|
||||
"Z": _asarray(self.xp, [[1, 0], [0, -1]], self.dtype),
|
||||
}
|
||||
operator_terms = [
|
||||
(
|
||||
coeff,
|
||||
tuple((site, paulis[name.upper()]) for name, site in ops),
|
||||
)
|
||||
for coeff, ops in terms
|
||||
]
|
||||
return self.expectation_operator_sum(operator_terms)
|
||||
|
||||
def expectation_operator_sum(self, terms):
|
||||
value = 0.0 + 0.0j
|
||||
norm = self.norm()
|
||||
for coeff, ops in terms:
|
||||
operators = {
|
||||
int(site): _asarray(self.xp, matrix, self.dtype)
|
||||
for site, matrix in ops
|
||||
}
|
||||
if len(ops) == 0:
|
||||
term_value = norm
|
||||
elif len(operators) == 1:
|
||||
site, matrix = next(iter(operators.items()))
|
||||
term_value = _to_scalar(self._expect_one_site(site, matrix))
|
||||
elif len(operators) == 2 and abs(max(operators) - min(operators)) == 1:
|
||||
site0, site1 = sorted(operators)
|
||||
term_value = _to_scalar(
|
||||
self._expect_adjacent(site0, operators[site0], operators[site1])
|
||||
)
|
||||
else:
|
||||
term_value = _to_scalar(self.expect_product_operators(operators))
|
||||
value += complex(coeff) * complex(term_value)
|
||||
return _real_if_close(value / norm)
|
||||
|
||||
def _expect_one_site(self, site, op):
|
||||
theta = self.xp.einsum(
|
||||
"a,asb,b->asb",
|
||||
self.lambdas[site],
|
||||
self.gammas[site],
|
||||
self.lambdas[site + 1],
|
||||
)
|
||||
op_theta = self.xp.einsum("us,asb->aub", op, theta)
|
||||
return _vdot(self.xp, theta, op_theta)
|
||||
|
||||
def _expect_adjacent(self, site, op_left, op_right):
|
||||
theta = self.xp.einsum(
|
||||
"a,asb,b,btc,c->astc",
|
||||
self.lambdas[site],
|
||||
self.gammas[site],
|
||||
self.lambdas[site + 1],
|
||||
self.gammas[site + 1],
|
||||
self.lambdas[site + 2],
|
||||
)
|
||||
op_theta = self.xp.einsum("us,vt,astc->auvc", op_left, op_right, theta)
|
||||
return _vdot(self.xp, theta, op_theta)
|
||||
|
||||
def expect_product_operators(self, operators):
|
||||
env = _asarray(self.xp, [[1.0 + 0.0j]], self.dtype)
|
||||
identity = _eye(self.xp, 2, self.dtype, self.device)
|
||||
for site in range(self.nqubits):
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
op = operators.get(site, identity)
|
||||
env = self.xp.einsum(
|
||||
"xy,xsb,st,ytd->bd", env, _conj(self.xp, tensor), op, tensor
|
||||
)
|
||||
return env.reshape(-1)[0]
|
||||
|
||||
def norm(self):
|
||||
return float(np.real(_to_scalar(self.expect_product_operators({}))))
|
||||
|
||||
def expectation_mpo(self, mpo_tensors):
|
||||
"""Compute ``<psi|MPO|psi> / <psi|psi>``.
|
||||
|
||||
MPO tensors are expected in ``(left_bond, phys_out, phys_in, right_bond)``
|
||||
order, with physical dimension 2 on every site.
|
||||
"""
|
||||
if len(mpo_tensors) != self.nqubits:
|
||||
raise ValueError(
|
||||
f"Expected {self.nqubits} MPO tensors, got {len(mpo_tensors)}."
|
||||
)
|
||||
env = _asarray(self.xp, [[[1.0 + 0.0j]]], self.dtype)
|
||||
for site, raw_mpo in enumerate(mpo_tensors):
|
||||
mpo = _asarray(self.xp, raw_mpo, self.dtype)
|
||||
if mpo.ndim != 4 or mpo.shape[1:3] != (2, 2):
|
||||
raise ValueError(
|
||||
"Each MPO tensor must have shape "
|
||||
"(left_bond, 2, 2, right_bond)."
|
||||
)
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
env = self.xp.einsum(
|
||||
"xlc,xub,lutr,ctd->brd",
|
||||
env,
|
||||
_conj(self.xp, tensor),
|
||||
mpo,
|
||||
tensor,
|
||||
)
|
||||
return _real_if_close(_to_scalar(env.reshape(-1)[0]) / self.norm())
|
||||
|
||||
|
||||
def _build_theta_svd_matrix(op, xp, lam_left, lam_mid, lam_right, gamma_left, gamma_right):
|
||||
"""Merge and apply a two-site gate, returning the SVD-ready matrix."""
|
||||
theta = xp.einsum(
|
||||
"a,asb,b,btc,c->astc",
|
||||
lam_left, gamma_left, lam_mid, gamma_right, lam_right,
|
||||
)
|
||||
gate = op.reshape(2, 2, 2, 2)
|
||||
theta = xp.einsum("uvst,astc->auvc", gate, theta)
|
||||
chi_left = theta.shape[0]
|
||||
chi_right = theta.shape[3]
|
||||
return {
|
||||
"chi_left": chi_left,
|
||||
"chi_right": chi_right,
|
||||
"matrix": theta.reshape(chi_left * 2, 2 * chi_right),
|
||||
}
|
||||
|
||||
|
||||
def _choose_bond(singvals, max_bond, cut_ratio, xp):
|
||||
max_possible = int(singvals.shape[0])
|
||||
keep = max_possible if max_bond is None else min(max_possible, int(max_bond))
|
||||
if cut_ratio is not None and cut_ratio > 0 and max_possible > 0:
|
||||
threshold = singvals[0] * cut_ratio
|
||||
if xp is np:
|
||||
ratio_keep = int(np.count_nonzero(singvals > threshold))
|
||||
else:
|
||||
ratio_keep = int((singvals > threshold).sum().detach().cpu().item())
|
||||
keep = min(keep, max(1, ratio_keep))
|
||||
return keep
|
||||
|
||||
|
||||
def _divide_left_lambda(tensor, lambdas, xp):
|
||||
if xp is np:
|
||||
safe = np.where(np.abs(lambdas) > 1e-300, lambdas, 1.0)
|
||||
else:
|
||||
safe = xp.where(xp.abs(lambdas) > 1e-300, lambdas, xp.ones_like(lambdas))
|
||||
return tensor / safe.reshape(-1, 1, 1)
|
||||
|
||||
|
||||
def _divide_right_lambda(tensor, lambdas, xp):
|
||||
if xp is np:
|
||||
safe = np.where(np.abs(lambdas) > 1e-300, lambdas, 1.0)
|
||||
else:
|
||||
safe = xp.where(xp.abs(lambdas) > 1e-300, lambdas, xp.ones_like(lambdas))
|
||||
return tensor / safe.reshape(1, 1, -1)
|
||||
|
||||
|
||||
def _make_two_site_update(item, umat, singvals, vh, max_bond, cut_ratio, xp):
|
||||
keep = _choose_bond(singvals, max_bond, cut_ratio, xp)
|
||||
umat = umat[:, :keep]
|
||||
kept = singvals[:keep]
|
||||
cut = singvals[keep:]
|
||||
vh = vh[:keep, :]
|
||||
|
||||
discarded_weight = 0.0
|
||||
if cut.shape[0] > 0:
|
||||
norm_kept = (kept * kept).sum()
|
||||
norm_cut = (cut * cut).sum()
|
||||
discarded_weight = float(_to_float(norm_cut))
|
||||
kept = kept / xp.sqrt(norm_kept / (norm_kept + norm_cut))
|
||||
|
||||
new_left = umat.reshape(item["chi_left"], 2, keep)
|
||||
new_right = vh.reshape(keep, 2, item["chi_right"])
|
||||
new_left = _divide_left_lambda(new_left, item["lam_left"], xp)
|
||||
new_right = _divide_right_lambda(new_right, item["lam_right"], xp)
|
||||
return {
|
||||
"site": item["site"],
|
||||
"left": new_left,
|
||||
"right": new_right,
|
||||
"lambda": kept,
|
||||
"truncation_error": discarded_weight,
|
||||
}
|
||||
|
||||
|
||||
def _gate_sites(gate):
|
||||
controls = tuple(getattr(gate, "control_qubits", ()))
|
||||
targets = tuple(getattr(gate, "target_qubits", ()))
|
||||
if controls:
|
||||
return controls + targets
|
||||
return targets
|
||||
|
||||
|
||||
# ── SWAP routing for non-adjacent two-qubit gates ──────────────────────
|
||||
|
||||
class _SWAPGate:
|
||||
"""Minimal SWAP gate wrapper for routing non-adjacent gates."""
|
||||
name = "swap"
|
||||
control_qubits = ()
|
||||
|
||||
def __init__(self, left, right):
|
||||
self.target_qubits = (left, right)
|
||||
|
||||
def matrix(self):
|
||||
return np.array(
|
||||
[[1, 0, 0, 0],
|
||||
[0, 0, 1, 0],
|
||||
[0, 1, 0, 0],
|
||||
[0, 0, 0, 1]],
|
||||
dtype=complex,
|
||||
)
|
||||
|
||||
|
||||
class _RoutedTwoQubitGate:
|
||||
"""Wraps a two-qubit gate with remapped physical sites after SWAP routing."""
|
||||
name = "routed_two_qubit"
|
||||
control_qubits = ()
|
||||
|
||||
def __init__(self, original_gate, physical_sites):
|
||||
self.target_qubits = tuple(physical_sites)
|
||||
self._matrix = original_gate.matrix()
|
||||
|
||||
def matrix(self):
|
||||
return self._matrix
|
||||
|
||||
|
||||
def _route_non_adjacent_gates(gates, nqubits):
|
||||
"""Insert SWAP networks to make all two-qubit gates adjacent.
|
||||
|
||||
For each non-adjacent two-qubit gate, inserts SWAP gates to bring the
|
||||
farther qubit adjacent, applies the original gate, then inserts reverse
|
||||
SWAPs to restore the qubit ordering. The resulting gate sequence
|
||||
contains only adjacent two-qubit gates and is safe for VidalTEBDExecutor.
|
||||
"""
|
||||
routed = []
|
||||
for gate in gates:
|
||||
sites = _gate_sites(gate)
|
||||
if len(sites) <= 1:
|
||||
routed.append(gate)
|
||||
continue
|
||||
|
||||
left, right = sorted(sites)
|
||||
if right - left == 1:
|
||||
routed.append(gate)
|
||||
continue
|
||||
|
||||
# Move qubit 'right' leftwards to sit at left+1
|
||||
for pos in range(right - 1, left, -1):
|
||||
routed.append(_SWAPGate(pos, pos + 1))
|
||||
|
||||
# Apply the original gate in its original qubit order. For gates like
|
||||
# CNOT(5, 0), sorting the routed sites would swap control and target.
|
||||
physical_map = {left: left, right: left + 1}
|
||||
routed.append(_RoutedTwoQubitGate(gate, [physical_map[site] for site in sites]))
|
||||
|
||||
# Reverse SWAPs to restore original ordering
|
||||
for pos in range(left + 1, right):
|
||||
routed.append(_SWAPGate(pos, pos + 1))
|
||||
|
||||
return routed
|
||||
|
||||
|
||||
def _disjoint_batches(gates):
|
||||
batches = []
|
||||
current = []
|
||||
touched = set()
|
||||
current_arity = None
|
||||
for gate in gates:
|
||||
sites = _gate_sites(gate)
|
||||
arity = len(sites)
|
||||
site_set = set(sites)
|
||||
if current and (current_arity != arity or touched & site_set):
|
||||
batches.append(current)
|
||||
current = []
|
||||
touched = set()
|
||||
current_arity = None
|
||||
current.append(gate)
|
||||
touched |= site_set
|
||||
current_arity = arity
|
||||
if current:
|
||||
batches.append(current)
|
||||
return batches
|
||||
|
||||
|
||||
def _is_two_qubit_batch(batch):
|
||||
return batch and all(len(_gate_sites(gate)) == 2 for gate in batch)
|
||||
|
||||
|
||||
class _FusedOneSiteGate:
|
||||
name = "fused_one_site"
|
||||
|
||||
def __init__(self, site, matrix):
|
||||
self.target_qubits = (site,)
|
||||
self.control_qubits = ()
|
||||
self._matrix = matrix
|
||||
|
||||
def matrix(self):
|
||||
return self._matrix
|
||||
|
||||
|
||||
def _fuse_one_site_blocks(gates):
|
||||
fused = []
|
||||
block = []
|
||||
|
||||
def flush_block():
|
||||
nonlocal block
|
||||
if not block:
|
||||
return
|
||||
per_site = {}
|
||||
for gate in block:
|
||||
site = _gate_sites(gate)[0]
|
||||
mat = gate.matrix()
|
||||
if site in per_site:
|
||||
per_site[site] = mat @ per_site[site]
|
||||
else:
|
||||
per_site[site] = mat
|
||||
for site in sorted(per_site):
|
||||
fused.append(_FusedOneSiteGate(site, per_site[site]))
|
||||
block = []
|
||||
|
||||
for gate in gates:
|
||||
if len(_gate_sites(gate)) == 1:
|
||||
block.append(gate)
|
||||
continue
|
||||
flush_block()
|
||||
fused.append(gate)
|
||||
flush_block()
|
||||
return fused
|
||||
|
||||
|
||||
def run_vidal_ring_xz(
|
||||
circuit,
|
||||
max_bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module="torch",
|
||||
):
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=max_bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return executor.expectation_ring_xz()
|
||||
151
src/qibotn/benchmark_cases.py
Normal file
151
src/qibotn/benchmark_cases.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""Reusable benchmark circuits and observables for expectation runs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates
|
||||
|
||||
|
||||
CIRCUITS = (
|
||||
"brickwall_cnot",
|
||||
"reversed_cnot",
|
||||
"shifted_cz",
|
||||
"rxx_rzz",
|
||||
"swap_scramble",
|
||||
"ghz_ladder",
|
||||
)
|
||||
|
||||
OBSERVABLES = (
|
||||
"ring_xz",
|
||||
"open_zz",
|
||||
"mixed_local",
|
||||
"range2_xx",
|
||||
"long_z_string",
|
||||
)
|
||||
|
||||
|
||||
def parse_names(raw, valid, label):
|
||||
if raw == ["all"]:
|
||||
return list(valid)
|
||||
unknown = sorted(set(raw) - set(valid))
|
||||
if unknown:
|
||||
raise ValueError(f"Unknown {label}: {', '.join(unknown)}")
|
||||
return raw
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
if kind == "ghz_ladder":
|
||||
circuit.add(gates.H(0))
|
||||
for qubit in range(nqubits - 1):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
return circuit
|
||||
|
||||
for layer in range(nlayers):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind in ("rxx_rzz", "swap_scramble"):
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind == "brickwall_cnot":
|
||||
add_brickwall(circuit, nqubits, gates.CNOT, layer, reverse=False)
|
||||
elif kind == "reversed_cnot":
|
||||
add_brickwall(circuit, nqubits, gates.CNOT, layer, reverse=True)
|
||||
elif kind == "shifted_cz":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(qubit, qubit + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
elif kind == "swap_scramble":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(qubit, qubit + 1))
|
||||
if layer % 4 == 3:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def add_brickwall(circuit, nqubits, gate, layer, reverse):
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
if reverse and layer % 2:
|
||||
circuit.add(gate(qubit + 1, qubit))
|
||||
else:
|
||||
circuit.add(gate(qubit, qubit + 1))
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
if reverse and not layer % 2:
|
||||
circuit.add(gate(qubit + 1, qubit))
|
||||
else:
|
||||
circuit.add(gate(qubit, qubit + 1))
|
||||
|
||||
|
||||
def observable_terms(kind, nqubits):
|
||||
if kind == "ring_xz":
|
||||
return [
|
||||
(0.5, (("X", site), ("Z", (site + 1) % nqubits)))
|
||||
for site in range(nqubits)
|
||||
]
|
||||
if kind == "open_zz":
|
||||
return [
|
||||
(1.0 / (nqubits - 1), (("Z", site), ("Z", site + 1)))
|
||||
for site in range(nqubits - 1)
|
||||
]
|
||||
if kind == "mixed_local":
|
||||
terms = [(0.25, (("X", 0),)), (-0.5, (("Z", nqubits - 1),))]
|
||||
terms += [
|
||||
(0.125, (("Y", site), ("Y", site + 1)))
|
||||
for site in range(0, nqubits - 1, 3)
|
||||
]
|
||||
return terms
|
||||
if kind == "range2_xx":
|
||||
return [
|
||||
(1.0 / max(1, nqubits - 2), (("X", site), ("X", site + 2)))
|
||||
for site in range(nqubits - 2)
|
||||
]
|
||||
if kind == "long_z_string":
|
||||
stride = max(1, nqubits // 16)
|
||||
return [(1.0, tuple(("Z", site) for site in range(0, nqubits, stride)))]
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def terms_to_dict(terms):
|
||||
return {
|
||||
"terms": [
|
||||
{
|
||||
"coefficient": float(np.real(coeff)),
|
||||
"operators": [(name, int(site)) for name, site in ops],
|
||||
}
|
||||
for coeff, ops in terms
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def exact_pauli_sum(circuit, terms, nqubits):
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
indices = np.arange(state.size, dtype=np.int64)
|
||||
value = 0.0 + 0.0j
|
||||
for coeff, ops in terms:
|
||||
flipped = indices.copy()
|
||||
phase = np.ones(state.size, dtype=np.complex128)
|
||||
for name, site in ops:
|
||||
shift = nqubits - 1 - site
|
||||
bit = (indices >> shift) & 1
|
||||
if name == "X":
|
||||
flipped ^= 1 << shift
|
||||
elif name == "Y":
|
||||
flipped ^= 1 << shift
|
||||
phase *= 1j * (1 - 2 * bit)
|
||||
elif name == "Z":
|
||||
phase *= 1 - 2 * bit
|
||||
elif name != "I":
|
||||
raise ValueError(f"Unsupported Pauli {name!r}.")
|
||||
value += coeff * np.vdot(state[flipped], phase * state)
|
||||
return float(value.real)
|
||||
@@ -1,6 +1,19 @@
|
||||
import cupy as cp
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
|
||||
|
||||
def _require_cupy():
|
||||
if cp is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum circuit converter requires cupy. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
return cp
|
||||
|
||||
# Reference: https://github.com/NVIDIA/cuQuantum/tree/main/python/samples/cutensornet/circuit_converter
|
||||
|
||||
|
||||
@@ -19,7 +32,7 @@ class QiboCircuitToEinsum:
|
||||
"""
|
||||
|
||||
def __init__(self, circuit, dtype="complex128"):
|
||||
self.backend = cp
|
||||
self.backend = _require_cupy()
|
||||
self.dtype = getattr(self.backend, dtype)
|
||||
self.init_basis_map(self.backend, dtype)
|
||||
self.init_intermediate_circuit(circuit)
|
||||
@@ -116,7 +129,9 @@ class QiboCircuitToEinsum:
|
||||
required_shape = self.op_shape_from_qubits(len(gate_qubits))
|
||||
self.gate_tensors.append(
|
||||
(
|
||||
cp.asarray(gate.matrix(), dtype=self.dtype).reshape(required_shape),
|
||||
self.backend.asarray(gate.matrix(), dtype=self.dtype).reshape(
|
||||
required_shape
|
||||
),
|
||||
gate_qubits,
|
||||
)
|
||||
)
|
||||
@@ -161,7 +176,7 @@ class QiboCircuitToEinsum:
|
||||
required_shape = self.op_shape_from_qubits(len(gate_qubits))
|
||||
self.gate_tensors_inverse.append(
|
||||
(
|
||||
cp.asarray(gate.matrix()).reshape(required_shape),
|
||||
self.backend.asarray(gate.matrix()).reshape(required_shape),
|
||||
gate_qubits,
|
||||
)
|
||||
)
|
||||
@@ -169,7 +184,7 @@ class QiboCircuitToEinsum:
|
||||
# self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
|
||||
self.active_qubits_inverse = np.unique(gates_qubits_inverse)
|
||||
|
||||
def get_pauli_gates(self, pauli_map, dtype="complex128", backend=cp):
|
||||
def get_pauli_gates(self, pauli_map, dtype="complex128", backend=None):
|
||||
"""Populate the gates for all pauli operators.
|
||||
|
||||
Parameters:
|
||||
@@ -180,6 +195,8 @@ class QiboCircuitToEinsum:
|
||||
Returns:
|
||||
A sequence of pauli gates.
|
||||
"""
|
||||
if backend is None:
|
||||
backend = _require_cupy()
|
||||
asarray = backend.asarray
|
||||
pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
|
||||
pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
|
||||
|
||||
@@ -1,10 +1,23 @@
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
import numpy as np
|
||||
|
||||
from qibotn.circuit_convertor import QiboCircuitToEinsum
|
||||
from qibotn.mps_utils import apply_gate, initial
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
cutn = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if cp is None or cutn is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS converter requires cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
|
||||
class QiboCircuitToMPS:
|
||||
"""A helper class to convert Qibo circuit to MPS.
|
||||
@@ -23,6 +36,7 @@ class QiboCircuitToMPS:
|
||||
dtype="complex128",
|
||||
rand_seed=0,
|
||||
):
|
||||
_require_cuquantum()
|
||||
np.random.seed(rand_seed)
|
||||
cp.random.seed(rand_seed)
|
||||
|
||||
@@ -44,4 +58,6 @@ class QiboCircuitToMPS:
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
cutn.destroy(self.handle)
|
||||
handle = getattr(self, "handle", None)
|
||||
if cutn is not None and handle is not None:
|
||||
cutn.destroy(handle)
|
||||
|
||||
1024
src/qibotn/csrc/torch_contractor.cpp
Normal file
1024
src/qibotn/csrc/torch_contractor.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,89 +1,46 @@
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
from cupy.cuda import nccl
|
||||
from cupy.cuda.runtime import getDeviceCount
|
||||
from cuquantum.tensornet import Network, contract
|
||||
from mpi4py import MPI
|
||||
from qibo import hamiltonians
|
||||
from qibo.symbols import I, X, Y, Z
|
||||
|
||||
from qibotn.circuit_convertor import QiboCircuitToEinsum
|
||||
from qibotn.circuit_to_mps import QiboCircuitToMPS
|
||||
from qibotn.mps_contraction_helper import MPSContractionHelper
|
||||
from qibotn.observables import (
|
||||
build_observable,
|
||||
check_observable,
|
||||
create_hamiltonian_from_dict,
|
||||
extract_gates_and_qubits,
|
||||
)
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
from cupy.cuda import nccl
|
||||
from cupy.cuda.runtime import getDeviceCount
|
||||
from cuquantum.tensornet import Network, contract
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
cutn = None
|
||||
nccl = None
|
||||
getDeviceCount = None
|
||||
Network = None
|
||||
contract = None
|
||||
|
||||
|
||||
def check_observable(observable, circuit_nqubit):
|
||||
"""Checks the type of observable and returns the appropriate Hamiltonian."""
|
||||
if observable is None:
|
||||
return build_observable(circuit_nqubit)
|
||||
elif isinstance(observable, dict):
|
||||
return create_hamiltonian_from_dict(observable, circuit_nqubit)
|
||||
elif isinstance(observable, hamiltonians.SymbolicHamiltonian):
|
||||
# TODO: check if the observable is compatible with the circuit
|
||||
return observable
|
||||
else:
|
||||
raise TypeError("Invalid observable type.")
|
||||
def _require_cuquantum():
|
||||
if (
|
||||
cp is None
|
||||
or cutn is None
|
||||
or nccl is None
|
||||
or getDeviceCount is None
|
||||
or Network is None
|
||||
or contract is None
|
||||
):
|
||||
raise ImportError(
|
||||
"The legacy GPU evaluation helpers require cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
|
||||
def build_observable(circuit_nqubit):
|
||||
"""Helper function to construct a target observable."""
|
||||
hamiltonian_form = 0
|
||||
for i in range(circuit_nqubit):
|
||||
hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit)
|
||||
|
||||
hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
|
||||
return hamiltonian
|
||||
|
||||
|
||||
def create_hamiltonian_from_dict(data, circuit_nqubit):
|
||||
"""Create a Qibo SymbolicHamiltonian from a dictionary representation.
|
||||
|
||||
Ensures that each Hamiltonian term explicitly acts on all circuit qubits
|
||||
by adding identity (`I`) gates where needed.
|
||||
|
||||
Args:
|
||||
data (dict): Dictionary containing Hamiltonian terms.
|
||||
circuit_nqubit (int): Total number of qubits in the quantum circuit.
|
||||
|
||||
Returns:
|
||||
hamiltonians.SymbolicHamiltonian: The constructed Hamiltonian.
|
||||
"""
|
||||
PAULI_GATES = {"X": X, "Y": Y, "Z": Z}
|
||||
|
||||
terms = []
|
||||
|
||||
for term in data["terms"]:
|
||||
coeff = term["coefficient"]
|
||||
operators = term["operators"] # List of tuples like [("Z", 0), ("X", 1)]
|
||||
|
||||
# Convert the operator list into a dictionary {qubit_index: gate}
|
||||
operator_dict = {q: PAULI_GATES[g] for g, q in operators}
|
||||
|
||||
# Build the full term ensuring all qubits are covered
|
||||
full_term_expr = [
|
||||
operator_dict[q](q) if q in operator_dict else I(q)
|
||||
for q in range(circuit_nqubit)
|
||||
]
|
||||
|
||||
# Multiply all operators together to form a single term
|
||||
term_expr = full_term_expr[0]
|
||||
for op in full_term_expr[1:]:
|
||||
term_expr *= op
|
||||
|
||||
# Scale by the coefficient
|
||||
final_term = coeff * term_expr
|
||||
terms.append(final_term)
|
||||
|
||||
if not terms:
|
||||
raise ValueError("No valid Hamiltonian terms were added.")
|
||||
|
||||
# Combine all terms
|
||||
hamiltonian_form = sum(terms)
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(hamiltonian_form)
|
||||
|
||||
|
||||
def get_ham_gates(pauli_map, dtype="complex128", backend=cp):
|
||||
def get_ham_gates(pauli_map, dtype="complex128", backend=None):
|
||||
"""Populate the gates for all pauli operators.
|
||||
|
||||
Parameters:
|
||||
@@ -94,6 +51,13 @@ def get_ham_gates(pauli_map, dtype="complex128", backend=cp):
|
||||
Returns:
|
||||
A sequence of pauli gates.
|
||||
"""
|
||||
if backend is None:
|
||||
backend = cp
|
||||
if backend is None:
|
||||
raise ImportError(
|
||||
"get_ham_gates requires an array backend; cupy is unavailable "
|
||||
"in this CPU-only environment."
|
||||
)
|
||||
asarray = backend.asarray
|
||||
pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
|
||||
pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
|
||||
@@ -111,47 +75,9 @@ def get_ham_gates(pauli_map, dtype="complex128", backend=cp):
|
||||
return gates
|
||||
|
||||
|
||||
def extract_gates_and_qubits(hamiltonian):
|
||||
"""
|
||||
Extracts the gates and their corresponding qubits from a Qibo Hamiltonian.
|
||||
|
||||
Parameters:
|
||||
hamiltonian (qibo.hamiltonians.Hamiltonian or qibo.hamiltonians.SymbolicHamiltonian):
|
||||
A Qibo Hamiltonian object.
|
||||
|
||||
Returns:
|
||||
list of tuples: [(coefficient, [(gate, qubit), ...]), ...]
|
||||
- coefficient: The prefactor of the term.
|
||||
- list of (gate, qubit): Each term's gates and the qubits they act on.
|
||||
"""
|
||||
extracted_terms = []
|
||||
|
||||
if isinstance(hamiltonian, hamiltonians.SymbolicHamiltonian):
|
||||
for term in hamiltonian.terms:
|
||||
coeff = term.coefficient # Extract coefficient
|
||||
gate_qubit_list = []
|
||||
|
||||
# Extract gate and qubit information
|
||||
for factor in term.factors:
|
||||
gate_name = str(factor)[
|
||||
0
|
||||
] # Extract the gate type (X, Y, Z) from 'X0', 'Z1'
|
||||
qubit = int(str(factor)[1:]) # Extract the qubit index
|
||||
gate_qubit_list.append((qubit, gate_name, coeff))
|
||||
coeff = 1.0
|
||||
|
||||
extracted_terms.append(gate_qubit_list)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported Hamiltonian type. Must be SymbolicHamiltonian or Hamiltonian."
|
||||
)
|
||||
|
||||
return extracted_terms
|
||||
|
||||
|
||||
def initialize_mpi():
|
||||
"""Initialize MPI communication and device selection."""
|
||||
_require_cuquantum()
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
@@ -162,6 +88,7 @@ def initialize_mpi():
|
||||
|
||||
def initialize_nccl(comm_mpi, rank, size):
|
||||
"""Initialize NCCL communication."""
|
||||
_require_cuquantum()
|
||||
nccl_id = nccl.get_unique_id() if rank == 0 else None
|
||||
nccl_id = comm_mpi.bcast(nccl_id, root=0)
|
||||
return nccl.NcclCommunicator(size, nccl_id, rank)
|
||||
@@ -179,6 +106,7 @@ def get_operands(qibo_circ, datatype, rank, comm):
|
||||
|
||||
def compute_optimal_path(network, n_samples, size, comm):
|
||||
"""Compute contraction path and broadcast optimal selection."""
|
||||
_require_cuquantum()
|
||||
path, info = network.contract_path(
|
||||
optimize={
|
||||
"samples": n_samples,
|
||||
@@ -207,6 +135,8 @@ def compute_slices(info, rank, size):
|
||||
|
||||
def reduce_result(result, comm, method="MPI", root=0):
|
||||
"""Reduce results across processes."""
|
||||
if method == "NCCL":
|
||||
_require_cuquantum()
|
||||
if method == "MPI":
|
||||
return comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
||||
|
||||
@@ -254,6 +184,7 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
comm, rank, size, device_id = initialize_mpi()
|
||||
operands = get_operands(qibo_circ, datatype, rank, comm)
|
||||
network = Network(*operands, options={"device_id": device_id})
|
||||
@@ -285,6 +216,7 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
comm_mpi, rank, size, device_id = initialize_mpi()
|
||||
comm_nccl = initialize_nccl(comm_mpi, rank, size)
|
||||
operands = get_operands(qibo_circ, datatype, rank, comm_mpi)
|
||||
@@ -309,6 +241,7 @@ def dense_vector_tn(qibo_circ, datatype):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
return contract(*myconvertor.state_vector_operands())
|
||||
|
||||
@@ -337,6 +270,7 @@ def expectation_tn_nccl(qibo_circ, datatype, observable, n_samples=8):
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
|
||||
_require_cuquantum()
|
||||
comm_mpi, rank, size, device_id = initialize_mpi()
|
||||
|
||||
comm_nccl = initialize_nccl(comm_mpi, rank, size)
|
||||
@@ -405,6 +339,7 @@ def expectation_tn_MPI(qibo_circ, datatype, observable, n_samples=8):
|
||||
Returns:
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
# Initialize MPI and device
|
||||
comm, rank, size, device_id = initialize_mpi()
|
||||
|
||||
@@ -464,6 +399,7 @@ def expectation_tn(qibo_circ, datatype, observable):
|
||||
Returns:
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
|
||||
observable = check_observable(observable, qibo_circ.nqubits)
|
||||
@@ -489,6 +425,7 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
|
||||
mps_helper = MPSContractionHelper(myconvertor.num_qubits)
|
||||
|
||||
|
||||
82
src/qibotn/expectation_runner.py
Normal file
82
src/qibotn/expectation_runner.py
Normal file
@@ -0,0 +1,82 @@
|
||||
"""High-level CPU expectation runner used by CLI scripts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
from qibo.backends import construct_backend
|
||||
|
||||
from qibotn.benchmark_cases import exact_pauli_sum
|
||||
from qibotn.observables import check_observable
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExpectationConfig:
|
||||
ansatz: str = "tn"
|
||||
mpi: bool = False
|
||||
bond: int | None = 1024
|
||||
cut_ratio: float | None = 1e-12
|
||||
tensor_module: str = "torch"
|
||||
quimb_backend: str = "torch"
|
||||
dtype: str = "complex128"
|
||||
torch_threads: int = 8
|
||||
parallel_opts: dict | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExpectationResult:
|
||||
value: float
|
||||
seconds: float
|
||||
rank: int = 0
|
||||
parallel_stats: list | None = None
|
||||
|
||||
|
||||
def exact_for_observable(circuit, observable, nqubits):
|
||||
if isinstance(observable, dict) and "terms" in observable:
|
||||
terms = [
|
||||
(
|
||||
term["coefficient"],
|
||||
tuple((name, site) for name, site in term["operators"]),
|
||||
)
|
||||
for term in observable["terms"]
|
||||
]
|
||||
return exact_pauli_sum(circuit, terms, nqubits)
|
||||
|
||||
hamiltonian = check_observable(observable, nqubits)
|
||||
return float(hamiltonian.expectation_from_state(circuit().state(numpy=True)).real)
|
||||
|
||||
|
||||
def run_cpu_expectation(circuit, observable, config):
|
||||
runcard = {
|
||||
"MPI_enabled": config.mpi,
|
||||
"MPS_enabled": config.ansatz.lower() == "mps",
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
"max_bond_dimension": config.bond,
|
||||
"cut_ratio": config.cut_ratio,
|
||||
"tensor_module": config.tensor_module,
|
||||
"quimb_backend": config.quimb_backend,
|
||||
"dtype": config.dtype,
|
||||
"torch_threads": config.torch_threads,
|
||||
"parallel_opts": config.parallel_opts or {},
|
||||
}
|
||||
backend = construct_backend(
|
||||
backend="qibotn",
|
||||
platform="cpu",
|
||||
runcard=runcard,
|
||||
)
|
||||
|
||||
start = time.perf_counter()
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
rank = getattr(backend, "rank", 0)
|
||||
stats = getattr(backend, "parallel_stats", None)
|
||||
return ExpectationResult(
|
||||
float(np.real(value)),
|
||||
elapsed,
|
||||
rank=rank,
|
||||
parallel_stats=list(stats) if stats is not None else None,
|
||||
)
|
||||
@@ -1,4 +1,16 @@
|
||||
from cuquantum.tensornet import contract, contract_path
|
||||
try:
|
||||
from cuquantum.tensornet import contract, contract_path
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
contract = None
|
||||
contract_path = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if contract is None or contract_path is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS contraction helper requires cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
# Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
|
||||
|
||||
@@ -113,6 +125,7 @@ class MPSContractionHelper:
|
||||
return self._contract(interleaved_inputs, options=options) / norm
|
||||
|
||||
def _contract(self, interleaved_inputs, options=None):
|
||||
_require_cuquantum()
|
||||
path = contract_path(*interleaved_inputs, options=options)[0]
|
||||
|
||||
return contract(*interleaved_inputs, options=options, optimize={"path": path})
|
||||
|
||||
@@ -1,6 +1,19 @@
|
||||
import cupy as cp
|
||||
from cuquantum.tensornet import contract
|
||||
from cuquantum.tensornet.experimental import contract_decompose
|
||||
try:
|
||||
import cupy as cp
|
||||
from cuquantum.tensornet import contract
|
||||
from cuquantum.tensornet.experimental import contract_decompose
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
contract = None
|
||||
contract_decompose = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if cp is None or contract is None or contract_decompose is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS helpers require cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
|
||||
def initial(num_qubits, dtype):
|
||||
@@ -13,6 +26,7 @@ def initial(num_qubits, dtype):
|
||||
Returns:
|
||||
The initial MPS tensors.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
state_tensor = cp.asarray([1, 0], dtype=dtype).reshape(1, 2, 1)
|
||||
mps_tensors = [state_tensor] * num_qubits
|
||||
return mps_tensors
|
||||
@@ -28,6 +42,7 @@ def mps_site_right_swap(mps_tensors, i, **kwargs):
|
||||
Returns:
|
||||
The updated MPS tensors.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
# contraction followed by QR decomposition
|
||||
a, _, b = contract_decompose(
|
||||
"ipj,jqk->iqj,jpk",
|
||||
@@ -60,6 +75,7 @@ def apply_gate(mps_tensors, gate, qubits, **kwargs):
|
||||
The updated MPS tensors.
|
||||
"""
|
||||
|
||||
_require_cuquantum()
|
||||
n_qubits = len(qubits)
|
||||
if n_qubits == 1:
|
||||
# single-qubit gate
|
||||
|
||||
126
src/qibotn/observables.py
Normal file
126
src/qibotn/observables.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""Observable helpers shared by tensor-network backends and benchmarks."""
|
||||
|
||||
from qibo import hamiltonians
|
||||
from qibo.symbols import I, X, Y, Z
|
||||
|
||||
|
||||
def check_observable(observable, circuit_nqubit):
|
||||
"""Checks the type of observable and returns the appropriate Hamiltonian."""
|
||||
if observable is None:
|
||||
return build_observable(circuit_nqubit)
|
||||
if isinstance(observable, dict):
|
||||
return create_hamiltonian_from_dict(observable, circuit_nqubit)
|
||||
if isinstance(observable, hamiltonians.SymbolicHamiltonian):
|
||||
return observable
|
||||
try:
|
||||
return hamiltonians.SymbolicHamiltonian(form=observable)
|
||||
except Exception as exc:
|
||||
raise TypeError("Invalid observable type.") from exc
|
||||
|
||||
|
||||
def build_observable(circuit_nqubit):
|
||||
"""Construct the default benchmark observable used by qibotn."""
|
||||
hamiltonian_form = 0
|
||||
for i in range(circuit_nqubit):
|
||||
hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit)
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
|
||||
|
||||
|
||||
def create_hamiltonian_from_dict(data, circuit_nqubit):
|
||||
"""Create a Qibo SymbolicHamiltonian from the qibotn dict representation."""
|
||||
if "pauli_string_pattern" in data:
|
||||
return create_hamiltonian_from_pauli_pattern(
|
||||
data["pauli_string_pattern"], circuit_nqubit
|
||||
)
|
||||
|
||||
pauli_gates = {"X": X, "Y": Y, "Z": Z}
|
||||
terms = []
|
||||
|
||||
for term in data["terms"]:
|
||||
coeff = term["coefficient"]
|
||||
operators = term["operators"]
|
||||
operator_dict = {q: pauli_gates[g] for g, q in operators}
|
||||
|
||||
full_term_expr = [
|
||||
operator_dict[q](q) if q in operator_dict else I(q)
|
||||
for q in range(circuit_nqubit)
|
||||
]
|
||||
|
||||
term_expr = full_term_expr[0]
|
||||
for op in full_term_expr[1:]:
|
||||
term_expr *= op
|
||||
|
||||
terms.append(coeff * term_expr)
|
||||
|
||||
if not terms:
|
||||
raise ValueError("No valid Hamiltonian terms were added.")
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(sum(terms))
|
||||
|
||||
|
||||
def create_hamiltonian_from_pauli_pattern(pattern, circuit_nqubit):
|
||||
"""Create a single Pauli-string Hamiltonian by repeating ``pattern``.
|
||||
|
||||
Example: pattern ``"IXZ"`` on 5 qubits becomes ``I0 * X1 * Z2 * I3 * X4``.
|
||||
Identity factors are omitted except for the all-identity case.
|
||||
"""
|
||||
if not isinstance(pattern, str) or not pattern:
|
||||
raise ValueError("pauli_string_pattern must be a non-empty string.")
|
||||
|
||||
pauli_gates = {"X": X, "Y": Y, "Z": Z}
|
||||
pattern = pattern.upper()
|
||||
invalid = sorted(set(pattern) - {"I", "X", "Y", "Z"})
|
||||
if invalid:
|
||||
raise ValueError(
|
||||
"pauli_string_pattern characters must be one of I/X/Y/Z; "
|
||||
f"got {''.join(invalid)!r}."
|
||||
)
|
||||
|
||||
expr = None
|
||||
for qubit in range(circuit_nqubit):
|
||||
name = pattern[qubit % len(pattern)]
|
||||
if name == "I":
|
||||
continue
|
||||
factor = pauli_gates[name](qubit)
|
||||
expr = factor if expr is None else expr * factor
|
||||
|
||||
if expr is None:
|
||||
expr = I(0)
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(form=expr)
|
||||
|
||||
|
||||
def build_random_circuit(nqubits, nlayers, seed=42):
|
||||
"""Build a random circuit with RY+RZ+CNOT layers for benchmarks."""
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates
|
||||
np.random.seed(seed)
|
||||
c = Circuit(nqubits)
|
||||
for _ in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
c.add(gates.RY(q, theta=np.random.uniform(0, 2*np.pi)))
|
||||
c.add(gates.RZ(q, theta=np.random.uniform(0, 2*np.pi)))
|
||||
for q in range(nqubits):
|
||||
c.add(gates.CNOT(q % nqubits, (q + 1) % nqubits))
|
||||
return c
|
||||
|
||||
|
||||
def extract_gates_and_qubits(hamiltonian):
|
||||
"""Extract per-term Pauli factors from a Qibo SymbolicHamiltonian.
|
||||
|
||||
Returns list of terms, where each term is (coefficient, [(qubit, gate_name), ...]).
|
||||
"""
|
||||
extracted_terms = []
|
||||
|
||||
if not isinstance(hamiltonian, hamiltonians.SymbolicHamiltonian):
|
||||
raise ValueError(
|
||||
"Unsupported Hamiltonian type. Must be SymbolicHamiltonian or Hamiltonian."
|
||||
)
|
||||
|
||||
for term in hamiltonian.terms:
|
||||
coeff = term.coefficient
|
||||
factors = [(int(str(f)[1:]), str(f)[0]) for f in term.factors]
|
||||
extracted_terms.append((coeff, factors))
|
||||
|
||||
return extracted_terms
|
||||
773
src/qibotn/parallel.py
Normal file
773
src/qibotn/parallel.py
Normal file
@@ -0,0 +1,773 @@
|
||||
"""Parallel path search and contraction utilities for tensor networks."""
|
||||
import os
|
||||
import pickle
|
||||
import signal
|
||||
import time
|
||||
from math import log2, log10
|
||||
import numpy as np
|
||||
from dataclasses import dataclass
|
||||
from concurrent.futures import ProcessPoolExecutor, TimeoutError, as_completed
|
||||
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
_HAVE_MPI = True
|
||||
except ImportError:
|
||||
_HAVE_MPI = False
|
||||
MPI = None
|
||||
|
||||
|
||||
SEARCH_METHODS = ("greedy", "kahypar", "kahypar-agglom", "spinglass")
|
||||
_COTENGRA_DASK_PATCHED = False
|
||||
_COTENGRA_DASK_SUBMIT_PATCHED = False
|
||||
_DASK_TRIAL_DEBUG = False
|
||||
|
||||
|
||||
def _optimizer_search_stats(opt):
|
||||
scores = list(getattr(opt, "scores", ()))
|
||||
finite_scores = [score for score in scores if np.isfinite(score)]
|
||||
times = list(getattr(opt, "times", ()))
|
||||
best = getattr(opt, "best", {}) or {}
|
||||
return {
|
||||
"completed_trials": len(scores),
|
||||
"finite_trials": len(finite_scores),
|
||||
"failed_trials": len(scores) - len(finite_scores),
|
||||
"requested_trials": int(getattr(opt, "max_repeats", 0) or 0),
|
||||
"trial_seconds_sum": float(sum(times)),
|
||||
"best_score": float(best.get("score", float("inf"))),
|
||||
"best_flops": float(best.get("flops", float("inf"))),
|
||||
"best_write": float(best.get("write", float("inf"))),
|
||||
"best_size": float(best.get("size", float("inf"))),
|
||||
}
|
||||
|
||||
|
||||
def _attach_search_stats(tree, opt):
|
||||
try:
|
||||
tree.qibotn_search_stats = _optimizer_search_stats(opt)
|
||||
except Exception:
|
||||
pass
|
||||
return tree
|
||||
|
||||
|
||||
def _dask_worker_slots(client):
|
||||
info = client.scheduler_info(n_workers=-1)
|
||||
workers = info.get("workers", {})
|
||||
return workers, sum(int(w.get("nthreads", 1) or 1) for w in workers.values())
|
||||
|
||||
|
||||
def _print_dask_worker_summary(client):
|
||||
workers, slots = _dask_worker_slots(client)
|
||||
by_host = {}
|
||||
for worker in workers.values():
|
||||
host = worker.get("host", "unknown")
|
||||
by_host.setdefault(host, {"workers": 0, "threads": 0})
|
||||
by_host[host]["workers"] += 1
|
||||
by_host[host]["threads"] += int(worker.get("nthreads", 1) or 1)
|
||||
print(
|
||||
"qibotn_dask_workers "
|
||||
f"workers={len(workers)} threads={slots} by_host={by_host}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def _run_trial_with_debug(fn, *args, **kwargs):
|
||||
import os
|
||||
import socket
|
||||
|
||||
try:
|
||||
from distributed import get_worker
|
||||
|
||||
worker = get_worker()
|
||||
worker_address = worker.address
|
||||
except Exception:
|
||||
worker_address = "unknown"
|
||||
|
||||
method = kwargs.get("method", "unknown")
|
||||
pid = os.getpid()
|
||||
host = socket.gethostname()
|
||||
print(
|
||||
"qibotn_trial_start "
|
||||
f"worker={worker_address} host={host} pid={pid} method={method}",
|
||||
flush=True,
|
||||
)
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
trial = fn(*args, **kwargs)
|
||||
except Exception as exc:
|
||||
elapsed = time.perf_counter() - start
|
||||
print(
|
||||
"qibotn_trial_error "
|
||||
f"worker={worker_address} host={host} pid={pid} "
|
||||
f"method={method} seconds={elapsed:.3f} error={exc!r}",
|
||||
flush=True,
|
||||
)
|
||||
raise
|
||||
elapsed = time.perf_counter() - start
|
||||
print(
|
||||
"qibotn_trial_done "
|
||||
f"worker={worker_address} host={host} pid={pid} method={method} "
|
||||
f"seconds={elapsed:.3f} score={trial.get('score', float('nan')):.6g} "
|
||||
f"flops={trial.get('flops', float('nan')):.6g} "
|
||||
f"size={trial.get('size', float('nan')):.6g}",
|
||||
flush=True,
|
||||
)
|
||||
return trial
|
||||
|
||||
|
||||
def _patch_cotengra_dask_submit(debug_trials=False):
|
||||
global _COTENGRA_DASK_SUBMIT_PATCHED, _DASK_TRIAL_DEBUG
|
||||
_DASK_TRIAL_DEBUG = bool(debug_trials)
|
||||
if _COTENGRA_DASK_SUBMIT_PATCHED:
|
||||
return
|
||||
|
||||
import cotengra.parallel as ctg_parallel
|
||||
import cotengra.hyperoptimizers.hyper as hyper
|
||||
|
||||
original_submit = ctg_parallel.submit
|
||||
|
||||
def submit(pool, fn, *args, **kwargs):
|
||||
backend = pool.__class__.__module__.split(".", 1)[0]
|
||||
if _DASK_TRIAL_DEBUG and backend == "distributed":
|
||||
return original_submit(
|
||||
pool,
|
||||
_run_trial_with_debug,
|
||||
fn,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
return original_submit(pool, fn, *args, **kwargs)
|
||||
|
||||
ctg_parallel.submit = submit
|
||||
hyper.submit = submit
|
||||
_COTENGRA_DASK_SUBMIT_PATCHED = True
|
||||
|
||||
|
||||
def _patch_cotengra_dask_as_completed():
|
||||
"""Make cotengra 0.7.5 handle distributed.Future objects.
|
||||
|
||||
This cotengra release routes all parallel futures through
|
||||
``concurrent.futures.as_completed()``, which does not accept dask
|
||||
``distributed.Future`` instances. Keep cotengra's optimizer/reporting logic
|
||||
intact and only swap the wait primitive when the futures are from dask.
|
||||
"""
|
||||
global _COTENGRA_DASK_PATCHED
|
||||
if _COTENGRA_DASK_PATCHED:
|
||||
return
|
||||
|
||||
from cotengra.hyperoptimizers.hyper import HyperOptimizer
|
||||
|
||||
def _get_and_report_next_future(self):
|
||||
futures_map = {future: setting for setting, future in self._futures}
|
||||
if not futures_map:
|
||||
return {
|
||||
"score": float("inf"),
|
||||
"flops": float("inf"),
|
||||
"write": float("inf"),
|
||||
"size": float("inf"),
|
||||
"time": 0.0,
|
||||
}
|
||||
|
||||
future0 = next(iter(futures_map))
|
||||
if future0.__class__.__module__.split(".", 1)[0] == "distributed":
|
||||
from distributed import as_completed
|
||||
|
||||
deadline = getattr(self, "_qibotn_deadline", None)
|
||||
timeout = None if deadline is None else max(0.0, deadline - time.time())
|
||||
try:
|
||||
future = next(iter(as_completed(futures_map, timeout=timeout)))
|
||||
except TimeoutError:
|
||||
for future in futures_map:
|
||||
future.cancel()
|
||||
self._futures = []
|
||||
return {
|
||||
"score": float("inf"),
|
||||
"flops": float("inf"),
|
||||
"write": float("inf"),
|
||||
"size": float("inf"),
|
||||
"time": 0.0,
|
||||
}
|
||||
else:
|
||||
import concurrent.futures as _cf
|
||||
|
||||
future = next(_cf.as_completed(futures_map))
|
||||
|
||||
setting = futures_map[future]
|
||||
self._futures = [(s, f) for s, f in self._futures if f is not future]
|
||||
try:
|
||||
trial = future.result()
|
||||
except Exception:
|
||||
trial = {
|
||||
"score": float("inf"),
|
||||
"flops": float("inf"),
|
||||
"write": float("inf"),
|
||||
"size": float("inf"),
|
||||
"time": 0.0,
|
||||
}
|
||||
self._maybe_report_result(setting, trial)
|
||||
return trial
|
||||
|
||||
HyperOptimizer._get_and_report_next_future = _get_and_report_next_future
|
||||
_COTENGRA_DASK_PATCHED = True
|
||||
|
||||
|
||||
def _search_chunk(
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
repeats,
|
||||
seed,
|
||||
max_time,
|
||||
slicing_opts,
|
||||
optlib=None,
|
||||
):
|
||||
import random, cotengra as ctg
|
||||
|
||||
random.seed(seed)
|
||||
tn = pickle.loads(tn_bytes)
|
||||
kwargs = {}
|
||||
if optlib is not None:
|
||||
kwargs["optlib"] = optlib
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=SEARCH_METHODS,
|
||||
max_repeats=repeats,
|
||||
max_time=max_time,
|
||||
parallel=False,
|
||||
minimize="combo-256",
|
||||
slicing_opts=slicing_opts,
|
||||
progbar=False,
|
||||
**kwargs,
|
||||
)
|
||||
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
|
||||
return tree.combo_cost(factor=256), _attach_search_stats(tree, opt)
|
||||
|
||||
|
||||
def _run_single_trial(tn_bytes, output_inds, seed, slicing_opts):
|
||||
return _search_chunk(
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
repeats=1,
|
||||
seed=seed,
|
||||
max_time=None,
|
||||
slicing_opts=slicing_opts,
|
||||
optlib="random",
|
||||
)
|
||||
|
||||
|
||||
def _kill_pool(pool):
|
||||
processes = getattr(pool, "_processes", None)
|
||||
if processes:
|
||||
pids = list(processes.keys())
|
||||
else:
|
||||
pids = []
|
||||
|
||||
for pid in pids:
|
||||
try:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
pool.shutdown(wait=False)
|
||||
|
||||
|
||||
def _serial_search(tn_bytes, output_inds, repeats, seed, max_time, slicing_opts=None, trial_timeout=None):
|
||||
import time
|
||||
|
||||
if trial_timeout is None:
|
||||
return _search_chunk(
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
repeats=repeats,
|
||||
seed=seed,
|
||||
max_time=max_time,
|
||||
slicing_opts=slicing_opts,
|
||||
)
|
||||
|
||||
deadline = time.time() + max_time
|
||||
best_cost, best_tree = float("inf"), None
|
||||
|
||||
for i in range(repeats):
|
||||
if time.time() >= deadline:
|
||||
break
|
||||
timeout = min(trial_timeout, deadline - time.time())
|
||||
pool = ProcessPoolExecutor(max_workers=1)
|
||||
fut = pool.submit(_run_single_trial, tn_bytes, output_inds, seed * 10000 + i, slicing_opts)
|
||||
try:
|
||||
cost, tree = fut.result(timeout=timeout)
|
||||
if cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
_kill_pool(pool)
|
||||
|
||||
return best_cost, best_tree
|
||||
|
||||
|
||||
def _split_repeats(total_repeats, n_workers):
|
||||
n_workers = max(1, int(n_workers))
|
||||
total_repeats = max(1, int(total_repeats))
|
||||
chunk, extra = divmod(total_repeats, n_workers)
|
||||
return [chunk + (1 if i < extra else 0) for i in range(n_workers) if chunk + (1 if i < extra else 0) > 0]
|
||||
|
||||
|
||||
def _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts=None, trial_timeout=None):
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
repeat_chunks = _split_repeats(total_repeats, n_workers)
|
||||
pool = ProcessPoolExecutor(max_workers=len(repeat_chunks))
|
||||
futures = []
|
||||
for seed, repeats in enumerate(repeat_chunks):
|
||||
futures.append(
|
||||
pool.submit(
|
||||
_serial_search,
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
repeats,
|
||||
seed,
|
||||
max_time,
|
||||
slicing_opts,
|
||||
trial_timeout,
|
||||
)
|
||||
)
|
||||
best_cost, best_tree = float("inf"), None
|
||||
deadline = time.monotonic() + max_time if max_time is not None else None
|
||||
try:
|
||||
timeout = None if deadline is None else max(0.0, deadline - time.monotonic())
|
||||
for fut in as_completed(futures, timeout=timeout):
|
||||
try:
|
||||
cost, tree = fut.result()
|
||||
if cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
except Exception:
|
||||
pass
|
||||
except TimeoutError:
|
||||
pass
|
||||
finally:
|
||||
for fut in futures:
|
||||
fut.cancel()
|
||||
_kill_pool(pool)
|
||||
return best_tree
|
||||
|
||||
|
||||
def _dask_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
slicing_opts=None,
|
||||
dask_address=None,
|
||||
n_workers=None,
|
||||
optlib=None,
|
||||
debug_trials=False,
|
||||
close_workers=False,
|
||||
):
|
||||
"""Run one centralized cotengra hyper-optimizer over a dask pool.
|
||||
|
||||
With ``dask_address`` this connects to an external distributed scheduler.
|
||||
Without it, a local dask cluster is created for single-node smoke testing.
|
||||
"""
|
||||
try:
|
||||
from distributed import Client, LocalCluster, get_client
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"Dask search requires `distributed`. Install it with "
|
||||
"`pip install distributed` or the package extra that provides it."
|
||||
) from exc
|
||||
|
||||
import cotengra as ctg
|
||||
|
||||
_patch_cotengra_dask_as_completed()
|
||||
_patch_cotengra_dask_submit(debug_trials=debug_trials)
|
||||
|
||||
close_client = False
|
||||
close_cluster = False
|
||||
cluster = None
|
||||
|
||||
if dask_address:
|
||||
client = Client(dask_address)
|
||||
close_client = True
|
||||
else:
|
||||
try:
|
||||
client = get_client()
|
||||
except ValueError:
|
||||
cluster = LocalCluster(
|
||||
n_workers=max(1, int(n_workers or os.cpu_count() or 1)),
|
||||
threads_per_worker=1,
|
||||
processes=True,
|
||||
memory_limit=0,
|
||||
)
|
||||
client = Client(cluster)
|
||||
close_client = True
|
||||
close_cluster = True
|
||||
|
||||
kwargs = {}
|
||||
if optlib is not None:
|
||||
kwargs["optlib"] = optlib
|
||||
|
||||
retire_workers = []
|
||||
try:
|
||||
workers, worker_slots = _dask_worker_slots(client)
|
||||
if close_workers:
|
||||
retire_workers = list(workers)
|
||||
if debug_trials:
|
||||
_print_dask_worker_summary(client)
|
||||
if total_repeats < worker_slots:
|
||||
print(
|
||||
"qibotn_dask_underutilized "
|
||||
f"requested_trials={total_repeats} worker_slots={worker_slots} "
|
||||
"hint='increase --tn-search-repeats to at least worker_slots'",
|
||||
flush=True,
|
||||
)
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=SEARCH_METHODS,
|
||||
max_repeats=total_repeats,
|
||||
max_time=max_time,
|
||||
parallel=client,
|
||||
minimize="combo-256",
|
||||
slicing_opts=slicing_opts,
|
||||
progbar=False,
|
||||
**kwargs,
|
||||
)
|
||||
opt._num_workers = max(1, worker_slots)
|
||||
opt.pre_dispatch = max(1, min(int(total_repeats), worker_slots))
|
||||
if max_time is not None:
|
||||
opt._qibotn_deadline = time.time() + max_time
|
||||
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
|
||||
return _attach_search_stats(tree, opt)
|
||||
finally:
|
||||
if close_workers and retire_workers:
|
||||
try:
|
||||
retired = client.retire_workers(
|
||||
workers=retire_workers,
|
||||
close_workers=True,
|
||||
remove=True,
|
||||
)
|
||||
print(
|
||||
"qibotn_dask_workers_closed "
|
||||
f"requested={len(retire_workers)} retired={len(retired)}",
|
||||
flush=True,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(
|
||||
"qibotn_dask_workers_close_failed "
|
||||
f"requested={len(retire_workers)} error={exc!r}",
|
||||
flush=True,
|
||||
)
|
||||
if close_client:
|
||||
client.close()
|
||||
if close_cluster:
|
||||
cluster.close()
|
||||
|
||||
|
||||
def _mpi_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
n_workers=None,
|
||||
slicing_opts=None,
|
||||
trial_timeout=None,
|
||||
search_backend="processpool",
|
||||
dask_address=None,
|
||||
debug_trials=False,
|
||||
dask_close_workers=False,
|
||||
):
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
search_backend = search_backend or "processpool"
|
||||
|
||||
if search_backend == "dask":
|
||||
if not dask_address:
|
||||
raise ValueError(
|
||||
"MPI + dask search requires an external dask scheduler. Start "
|
||||
"dask-scheduler/dask-worker outside mpiexec and pass "
|
||||
"`--dask-address tcp://host:8786`."
|
||||
)
|
||||
|
||||
payload = None
|
||||
if rank == 0:
|
||||
try:
|
||||
tree = _dask_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
slicing_opts=slicing_opts,
|
||||
dask_address=dask_address,
|
||||
n_workers=n_workers,
|
||||
debug_trials=debug_trials,
|
||||
close_workers=dask_close_workers,
|
||||
)
|
||||
payload = ("ok", tree)
|
||||
except Exception as exc:
|
||||
payload = ("error", repr(exc))
|
||||
|
||||
status, value = comm.bcast(payload, root=0)
|
||||
if status == "error":
|
||||
raise RuntimeError(f"Dask path search failed on rank 0: {value}")
|
||||
return value
|
||||
|
||||
repeats_per = max(1, total_repeats // size)
|
||||
|
||||
# Run search work in child processes even when n_workers == 1, so the parent
|
||||
# MPI rank can enforce the global timeout by killing active trials.
|
||||
local_tree = _processpool_search(
|
||||
tn,
|
||||
output_inds,
|
||||
repeats_per,
|
||||
max(1, n_workers or 1),
|
||||
max_time,
|
||||
slicing_opts,
|
||||
trial_timeout,
|
||||
)
|
||||
local_cost = local_tree.combo_cost(factor=256) if local_tree else float("inf")
|
||||
|
||||
all_results = comm.gather((local_cost, local_tree), root=0)
|
||||
best_tree = None
|
||||
if rank == 0:
|
||||
best_cost = float("inf")
|
||||
for cost, tree in all_results:
|
||||
if tree is not None and cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
return comm.bcast(best_tree, root=0)
|
||||
|
||||
|
||||
def parallel_path_search(tn, output_inds, method='processpool', total_repeats=1024,
|
||||
max_time=300, n_workers=48, slicing_opts=None,
|
||||
trial_timeout=None, search_backend=None,
|
||||
dask_address=None, debug_trials=False,
|
||||
dask_close_workers=False):
|
||||
"""Parallel contraction path search.
|
||||
|
||||
Args:
|
||||
method: 'processpool' | 'dask' | 'mpi' | 'serial'
|
||||
total_repeats: Total optimization repeats across all workers
|
||||
max_time: Global timeout per worker (seconds)
|
||||
n_workers: Workers per MPI rank (or total for processpool)
|
||||
slicing_opts: cotengra slicing options for memory control
|
||||
trial_timeout: Per-trial timeout (seconds); kills and skips hung trials
|
||||
"""
|
||||
if method == 'serial':
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
_, tree = _serial_search(tn_bytes, output_inds, total_repeats, 0, max_time, slicing_opts, trial_timeout)
|
||||
return tree
|
||||
elif method == 'mpi':
|
||||
if not _HAVE_MPI:
|
||||
raise ImportError("mpi4py not available")
|
||||
return _mpi_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
n_workers,
|
||||
slicing_opts,
|
||||
trial_timeout,
|
||||
search_backend=search_backend,
|
||||
dask_address=dask_address,
|
||||
debug_trials=debug_trials,
|
||||
dask_close_workers=dask_close_workers,
|
||||
)
|
||||
elif method == 'processpool':
|
||||
return _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts, trial_timeout)
|
||||
elif method == 'dask':
|
||||
return _dask_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
slicing_opts=slicing_opts,
|
||||
dask_address=dask_address,
|
||||
n_workers=n_workers,
|
||||
debug_trials=debug_trials,
|
||||
close_workers=dask_close_workers,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown method: {method}")
|
||||
|
||||
|
||||
def contraction_tree_costs(tree, dtype_bytes=16, combo_factor=256):
|
||||
"""Return comparable cost estimates for a cotengra contraction tree.
|
||||
|
||||
These values are estimates, not profiling results. They are the right first
|
||||
signal for path quality: lower ``combo`` usually means lower CPU contraction
|
||||
time, while ``peak_memory_gib`` estimates the largest intermediate tensor.
|
||||
"""
|
||||
stats = tree.contract_stats()
|
||||
flops = float(stats["flops"])
|
||||
write = float(stats["write"])
|
||||
size = float(stats["size"])
|
||||
combo = float(tree.combo_cost(factor=combo_factor))
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
original_flops = float(stats.get("original_flops", flops))
|
||||
|
||||
return {
|
||||
"flops": flops,
|
||||
"write": write,
|
||||
"size": size,
|
||||
"combo": combo,
|
||||
"log10_flops": log10(flops) if flops > 0 else float("-inf"),
|
||||
"log10_write": log10(write) if write > 0 else float("-inf"),
|
||||
"log2_size": log2(size) if size > 0 else float("-inf"),
|
||||
"log10_combo": log10(combo) if combo > 0 else float("-inf"),
|
||||
"nslices": nslices,
|
||||
"slicing_overhead": flops / original_flops if original_flops > 0 else float("nan"),
|
||||
"peak_memory_gib": size * dtype_bytes / 1024**3,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SlicePlan:
|
||||
"""Slice ownership for one MPI rank."""
|
||||
|
||||
rank: int
|
||||
size: int
|
||||
nslices: int
|
||||
indices: tuple
|
||||
assignment: str = "block"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SlicedContractStats:
|
||||
"""Diagnostics for one sliced contraction."""
|
||||
|
||||
rank: int
|
||||
size: int
|
||||
nslices: int
|
||||
local_slices: int
|
||||
assignment: str
|
||||
|
||||
|
||||
def mpi_slice_plan(nslices, rank, size, assignment="block"):
|
||||
"""Return the contraction slice ids assigned to one MPI rank.
|
||||
|
||||
``block`` gives each rank a contiguous range, mirroring cutensornet's
|
||||
slice-range style. ``cyclic`` gives rank ``r`` slices ``r, r + size, ...``,
|
||||
which can balance better if individual slice costs vary.
|
||||
"""
|
||||
if nslices < 0:
|
||||
raise ValueError("nslices must be non-negative.")
|
||||
if size <= 0:
|
||||
raise ValueError("size must be positive.")
|
||||
if not 0 <= rank < size:
|
||||
raise ValueError("rank must satisfy 0 <= rank < size.")
|
||||
|
||||
if assignment == "block":
|
||||
chunk, extra = divmod(nslices, size)
|
||||
start = rank * chunk + min(rank, extra)
|
||||
stop = start + chunk + (1 if rank < extra else 0)
|
||||
indices = tuple(range(start, stop))
|
||||
elif assignment == "cyclic":
|
||||
indices = tuple(range(rank, nslices, size))
|
||||
else:
|
||||
raise ValueError("assignment must be 'block' or 'cyclic'.")
|
||||
|
||||
return SlicePlan(rank, size, nslices, indices, assignment)
|
||||
|
||||
|
||||
def _array_backend(arrays):
|
||||
return "torch" if type(arrays[0]).__module__.startswith("torch") else "numpy"
|
||||
|
||||
|
||||
def _to_numpy_vector(value, is_torch):
|
||||
if is_torch:
|
||||
return value.detach().cpu().numpy().reshape(-1)
|
||||
return np.asarray(value).reshape(-1)
|
||||
|
||||
|
||||
def _zero_vector_like(arrays):
|
||||
array = arrays[0]
|
||||
if type(array).__module__.startswith("torch"):
|
||||
return np.zeros(1, dtype=np.complex64 if "64" in str(array.dtype) else np.complex128)
|
||||
return np.zeros(1, dtype=np.asarray(array).dtype)
|
||||
|
||||
|
||||
def contract_tree_slices(tree, arrays, slice_indices, backend=None, implementation=None):
|
||||
"""Contract a subset of cotengra slices and return their local sum."""
|
||||
backend = backend or _array_backend(arrays)
|
||||
is_torch = backend == "torch"
|
||||
local = None
|
||||
cpp_contract = None
|
||||
if implementation == "cpp":
|
||||
if backend != "torch":
|
||||
raise ValueError("implementation='cpp' requires torch arrays.")
|
||||
from qibotn.torch_contractor import contract_tree_cpp
|
||||
|
||||
cpp_contract = contract_tree_cpp
|
||||
|
||||
for slice_id in slice_indices:
|
||||
if cpp_contract is not None:
|
||||
value = cpp_contract(tree, tree.slice_arrays(arrays, slice_id))
|
||||
elif implementation is None:
|
||||
value = tree.contract_slice(arrays, slice_id, backend=backend)
|
||||
else:
|
||||
value = tree.contract_slice(
|
||||
arrays,
|
||||
slice_id,
|
||||
backend=backend,
|
||||
implementation=implementation,
|
||||
)
|
||||
value = _to_numpy_vector(value, is_torch)
|
||||
local = value if local is None else local + value
|
||||
|
||||
return _zero_vector_like(arrays) if local is None else local
|
||||
|
||||
|
||||
def parallel_contract(
|
||||
tree,
|
||||
arrays,
|
||||
method='mpi',
|
||||
comm=None,
|
||||
assignment="block",
|
||||
return_stats=False,
|
||||
implementation=None,
|
||||
):
|
||||
if method == 'mpi':
|
||||
if not _HAVE_MPI or comm is None:
|
||||
raise ValueError("MPI method requires mpi4py and comm")
|
||||
return _contract_mpi(
|
||||
tree,
|
||||
arrays,
|
||||
comm,
|
||||
assignment=assignment,
|
||||
return_stats=return_stats,
|
||||
implementation=implementation,
|
||||
)
|
||||
raise ValueError(f"Unknown method: {method}")
|
||||
|
||||
|
||||
def _contract_mpi(
|
||||
tree,
|
||||
arrays,
|
||||
comm,
|
||||
root=0,
|
||||
assignment="block",
|
||||
return_stats=False,
|
||||
implementation=None,
|
||||
):
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
backend = _array_backend(arrays)
|
||||
is_torch = backend == "torch"
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
stats = SlicedContractStats(rank, size, nslices, 0, assignment)
|
||||
nslices_by_rank = comm.allgather(nslices)
|
||||
if len(set(nslices_by_rank)) != 1:
|
||||
raise RuntimeError(
|
||||
"Inconsistent contraction tree slices across MPI ranks: "
|
||||
f"{nslices_by_rank}. Ensure all nodes load the same tree file."
|
||||
)
|
||||
|
||||
if not set(getattr(tree, "sliced_inds", ())).isdisjoint(set(getattr(tree, "output", ()))):
|
||||
raise NotImplementedError(
|
||||
"MPI sliced contraction currently requires sliced indices not to "
|
||||
"appear in the output."
|
||||
)
|
||||
|
||||
plan = mpi_slice_plan(nslices, rank, size, assignment=assignment)
|
||||
local = contract_tree_slices(
|
||||
tree,
|
||||
arrays,
|
||||
plan.indices,
|
||||
backend=backend,
|
||||
implementation=implementation,
|
||||
)
|
||||
stats = SlicedContractStats(rank, size, nslices, len(plan.indices), assignment)
|
||||
|
||||
result = np.zeros_like(local) if rank == root else None
|
||||
comm.Reduce(local, result, root=root)
|
||||
return (result, stats) if return_stats else result
|
||||
@@ -57,10 +57,10 @@ class TensorNetworkResult:
|
||||
return self.measures
|
||||
|
||||
def state(self):
|
||||
"""Return the statevector if the number of qubits is less than 20."""
|
||||
if self.nqubits < 20:
|
||||
"""Return the statevector if the number of qubits is less than 35."""
|
||||
if self.nqubits < 35:
|
||||
return self.statevector
|
||||
raise_error(
|
||||
NotImplementedError,
|
||||
f"Tensor network simulation cannot be used to reconstruct statevector for >= 20 .",
|
||||
f"Tensor network simulation cannot be used to reconstruct statevector for >= 35 .",
|
||||
)
|
||||
|
||||
252
src/qibotn/torch_contractor.py
Normal file
252
src/qibotn/torch_contractor.py
Normal file
@@ -0,0 +1,252 @@
|
||||
"""Torch C++ contraction backend for cotengra trees.
|
||||
|
||||
This module compiles a restricted cotengra contraction tree into a compact
|
||||
execution plan, then executes that plan in a C++ torch extension. It is an
|
||||
experimental CPU path for reducing Python-level overhead between many
|
||||
pairwise contractions.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
_EXTENSION = None
|
||||
_CONTRACTORS = {}
|
||||
SMALL_GEMM_BATCH_FLOPS = 1_000_000
|
||||
|
||||
|
||||
def _load_extension():
|
||||
global _EXTENSION
|
||||
if _EXTENSION is not None:
|
||||
return _EXTENSION
|
||||
|
||||
from torch.utils.cpp_extension import load
|
||||
|
||||
source = Path(__file__).resolve().parent / "csrc" / "torch_contractor.cpp"
|
||||
mklroot = os.environ.get("MKLROOT")
|
||||
extra_cflags = ["-O3"]
|
||||
extra_ldflags = []
|
||||
extra_include_paths = []
|
||||
if mklroot:
|
||||
mklroot_path = Path(mklroot)
|
||||
mkl_include = mklroot_path / "include"
|
||||
mkl_lib = mklroot_path / "lib"
|
||||
if (mkl_include / "mkl_cblas.h").exists() and (
|
||||
(mkl_lib / "libmkl_rt.so").exists()
|
||||
or (mkl_lib / "libmkl_rt.so.2").exists()
|
||||
):
|
||||
extra_cflags.append("-DQIBOTN_USE_MKL")
|
||||
extra_include_paths.append(str(mkl_include))
|
||||
extra_ldflags.extend([f"-L{mkl_lib}", "-lmkl_rt"])
|
||||
|
||||
_EXTENSION = load(
|
||||
name="qibotn_torch_contractor",
|
||||
sources=[str(source)],
|
||||
extra_cflags=extra_cflags,
|
||||
extra_ldflags=extra_ldflags,
|
||||
extra_include_paths=extra_include_paths,
|
||||
verbose=False,
|
||||
)
|
||||
return _EXTENSION
|
||||
|
||||
|
||||
def _is_plain_permutation(expr):
|
||||
if expr is None:
|
||||
return None
|
||||
if isinstance(expr, tuple):
|
||||
return tuple(int(i) for i in expr)
|
||||
if not isinstance(expr, str):
|
||||
return None
|
||||
if "," in expr or "->" not in expr:
|
||||
return None
|
||||
source, target = expr.split("->", 1)
|
||||
if len(source) != len(target):
|
||||
return None
|
||||
if len(set(source)) != len(source) or set(source) != set(target):
|
||||
return None
|
||||
return tuple(source.index(ix) for ix in target)
|
||||
|
||||
|
||||
def _maybe_tuple(values):
|
||||
return () if values is None else tuple(int(x) for x in values)
|
||||
|
||||
|
||||
def _shape_from_inds(tree, node):
|
||||
return tuple(int(tree.size_dict[ix]) for ix in tree.get_inds(node))
|
||||
|
||||
|
||||
def _matmul_signature(op):
|
||||
kind = op[3]
|
||||
if kind != 0:
|
||||
return None
|
||||
left_shape = op[5]
|
||||
right_shape = op[7]
|
||||
if len(left_shape) == 2 and len(right_shape) == 2:
|
||||
m, k, n = left_shape[-2], left_shape[-1], right_shape[-1]
|
||||
return ("mm", int(m), int(k), int(n), int(m * k * n))
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_node_ids(tree, contractions):
|
||||
leaf_to_id = {
|
||||
frozenset((i,)): i
|
||||
for i in range(tree.N)
|
||||
}
|
||||
next_id = len(leaf_to_id)
|
||||
node_to_id = dict(leaf_to_id)
|
||||
for parent, _left, _right, _tdot, _arg, _perm in contractions:
|
||||
if parent not in node_to_id:
|
||||
node_to_id[parent] = next_id
|
||||
next_id += 1
|
||||
|
||||
return node_to_id, next_id
|
||||
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def compile_torch_plan(tree):
|
||||
"""Compile ``tree`` into C++ contractor plan fields.
|
||||
|
||||
The supported subset is the same pairwise matmul lowering used by
|
||||
cotengra for torch CPU. Single-tensor diagonal/sum preprocessing is not
|
||||
supported yet because it appears only in less common trees; callers should
|
||||
fall back to cotengra for those cases.
|
||||
"""
|
||||
|
||||
contract_mod = importlib.import_module("cotengra.contract")
|
||||
contractions = contract_mod.extract_contractions(tree)
|
||||
node_to_id, ntemps = _normalize_node_ids(tree, contractions)
|
||||
plan = []
|
||||
|
||||
for parent, left, right, tdot, arg, perm in contractions:
|
||||
if left is None or right is None:
|
||||
raise NotImplementedError(
|
||||
"C++ torch contractor does not support cotengra preprocessing."
|
||||
)
|
||||
|
||||
left_shape = _shape_from_inds(tree, left)
|
||||
right_shape = _shape_from_inds(tree, right)
|
||||
if tdot:
|
||||
parsed = contract_mod._parse_tensordot_axes_to_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
else:
|
||||
parsed = contract_mod._parse_eq_to_batch_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
|
||||
(
|
||||
eq_a,
|
||||
eq_b,
|
||||
new_shape_a,
|
||||
new_shape_b,
|
||||
new_shape_ab,
|
||||
perm_ab,
|
||||
pure_multiplication,
|
||||
) = parsed
|
||||
|
||||
left_perm = _is_plain_permutation(eq_a)
|
||||
right_perm = _is_plain_permutation(eq_b)
|
||||
if left_perm is None and eq_a is not None:
|
||||
raise NotImplementedError(f"Unsupported left preparation: {eq_a!r}")
|
||||
if right_perm is None and eq_b is not None:
|
||||
raise NotImplementedError(f"Unsupported right preparation: {eq_b!r}")
|
||||
|
||||
plan.append(
|
||||
(
|
||||
node_to_id[parent],
|
||||
node_to_id[left],
|
||||
node_to_id[right],
|
||||
1 if pure_multiplication else 0,
|
||||
left_perm or (),
|
||||
_maybe_tuple(new_shape_a),
|
||||
right_perm or (),
|
||||
_maybe_tuple(new_shape_b),
|
||||
_maybe_tuple(new_shape_ab),
|
||||
_maybe_tuple(perm_ab),
|
||||
)
|
||||
)
|
||||
|
||||
if perm is not None:
|
||||
raise NotImplementedError(
|
||||
"C++ torch contractor does not support cotengra tensordot perm."
|
||||
)
|
||||
|
||||
root_id = node_to_id[tree.root]
|
||||
return tuple(plan), int(ntemps), int(root_id)
|
||||
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def compile_batch_groups(tree, max_flops=SMALL_GEMM_BATCH_FLOPS):
|
||||
plan, _ntemps, _root_id = compile_torch_plan(tree)
|
||||
contractions = importlib.import_module("cotengra.contract").extract_contractions(tree)
|
||||
node_to_id, _ntemps = _normalize_node_ids(tree, contractions)
|
||||
depth = {frozenset((i,)): 0 for i in range(tree.N)}
|
||||
tensor_depth = {i: 0 for i in range(tree.N)}
|
||||
groups = defaultdict(list)
|
||||
|
||||
for op_index, (contract_op, contraction) in enumerate(zip(plan, contractions)):
|
||||
parent, left, right, _tdot, _arg, _perm = contraction
|
||||
d = max(depth[left], depth[right]) + 1
|
||||
depth[parent] = d
|
||||
tensor_depth[contract_op[0]] = d
|
||||
sig = _matmul_signature(contract_op)
|
||||
if sig is None:
|
||||
continue
|
||||
kind, m, k, n, flops = sig
|
||||
if flops > max_flops:
|
||||
continue
|
||||
groups[(d, kind, m, k, n)].append(op_index)
|
||||
|
||||
batch_groups = tuple(
|
||||
tuple(items)
|
||||
for _key, items in sorted(groups.items(), key=lambda item: (item[0], item[1][0]))
|
||||
if len(items) >= 2
|
||||
)
|
||||
return batch_groups
|
||||
|
||||
|
||||
def batch_group_summary(tree, max_flops=SMALL_GEMM_BATCH_FLOPS):
|
||||
plan, _ntemps, _root_id = compile_torch_plan(tree)
|
||||
groups = compile_batch_groups(tree, max_flops=max_flops)
|
||||
covered = sum(len(group) for group in groups)
|
||||
calls_saved = sum(len(group) - 1 for group in groups)
|
||||
by_shape = []
|
||||
for group in groups:
|
||||
op = plan[group[0]]
|
||||
sig = _matmul_signature(op)
|
||||
by_shape.append((sig[1:4], len(group), group[:8]))
|
||||
return {
|
||||
"groups": len(groups),
|
||||
"covered_ops": covered,
|
||||
"calls_saved": calls_saved,
|
||||
"by_shape": by_shape,
|
||||
}
|
||||
|
||||
|
||||
def contract_tree_cpp(tree, arrays):
|
||||
"""Contract a cotengra tree using the experimental C++ torch contractor."""
|
||||
|
||||
contractor = prepare_torch_cpp_contractor(tree)
|
||||
return contractor.contract(list(arrays))
|
||||
|
||||
|
||||
def prepare_torch_cpp_contractor(tree):
|
||||
"""Load the extension and compile ``tree`` without running contraction."""
|
||||
|
||||
ext = _load_extension()
|
||||
key = id(tree)
|
||||
contractor = _CONTRACTORS.get(key)
|
||||
if contractor is None:
|
||||
plan, ntemps, root_id = compile_torch_plan(tree)
|
||||
contractor = ext.Contractor(list(plan), ntemps, root_id)
|
||||
_CONTRACTORS[key] = contractor
|
||||
return contractor
|
||||
186
tests/test_cpu_backend.py
Normal file
186
tests/test_cpu_backend.py
Normal file
@@ -0,0 +1,186 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Z
|
||||
|
||||
from qibotn.backends.cpu import CpuTensorNet
|
||||
from qibotn.benchmark_cases import (
|
||||
build_circuit as build_benchmark_circuit,
|
||||
exact_pauli_sum,
|
||||
)
|
||||
|
||||
|
||||
def build_circuit(nqubits=6):
|
||||
circuit = Circuit(nqubits)
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=0.1 * (qubit + 1)))
|
||||
circuit.add(gates.RZ(qubit, theta=-0.05 * (qubit + 1)))
|
||||
for qubit in range(nqubits - 1):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
return circuit
|
||||
|
||||
|
||||
def build_observable(nqubits):
|
||||
form = 0
|
||||
for qubit in range(nqubits):
|
||||
form += 0.5 * X(qubit) * Z((qubit + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def test_cpu_generic_tn_expectation_matches_statevector():
|
||||
circuit = build_circuit()
|
||||
observable = build_observable(circuit.nqubits)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": False,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_mps_expectation_matches_statevector():
|
||||
circuit = build_circuit()
|
||||
observable = build_observable(circuit.nqubits)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": True,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
"max_bond_dimension": 64,
|
||||
"tensor_module": "torch",
|
||||
"torch_threads": 1,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_runcard_pauli_pattern_matches_statevector():
|
||||
circuit = build_circuit()
|
||||
observable = {"pauli_string_pattern": "IXZ"}
|
||||
exact_hamiltonian = hamiltonians.SymbolicHamiltonian(
|
||||
form=X(1) * Z(2) * X(4) * Z(5)
|
||||
)
|
||||
exact = exact_hamiltonian.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
for mps_enabled in (False, True):
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": mps_enabled,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
"max_bond_dimension": 64,
|
||||
"tensor_module": "torch",
|
||||
"torch_threads": 1,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_mps_sampling_uses_nshots():
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
for qubit in range(3):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": True,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": False,
|
||||
}
|
||||
)
|
||||
result = backend.execute_circuit(circuit, nshots=100)
|
||||
|
||||
assert sum(result.frequencies().values()) == 100
|
||||
assert set(result.frequencies()) <= {"0000", "1111"}
|
||||
|
||||
|
||||
def test_cpu_mps_mpo_expectation_matches_statevector():
|
||||
circuit = build_circuit(nqubits=4)
|
||||
x = np.array([[0, 1], [1, 0]], dtype=complex)
|
||||
z = np.array([[1, 0], [0, -1]], dtype=complex)
|
||||
i2 = np.eye(2, dtype=complex)
|
||||
mpo = [
|
||||
x.reshape(1, 2, 2, 1),
|
||||
z.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
]
|
||||
exact = exact_pauli_sum(circuit, [(1.0, (("X", 0), ("Z", 1)))], 4)
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": True,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": {"mpo_tensors": mpo},
|
||||
"max_bond_dimension": 64,
|
||||
"tensor_module": "torch",
|
||||
"torch_threads": 1,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_mps_dense_observable_dict_matches_known_value():
|
||||
circuit = Circuit(2)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 1))
|
||||
|
||||
bell = np.zeros((4, 4), dtype=complex)
|
||||
bell[0, 0] = bell[0, 3] = bell[3, 0] = bell[3, 3] = 0.5
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": True,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": {"matrix": bell, "qubits": [0, 1]},
|
||||
"max_bond_dimension": 16,
|
||||
"tensor_module": "torch",
|
||||
"torch_threads": 1,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, 1.0, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_generic_tn_long_pauli_string_matches_statevector():
|
||||
circuit = build_benchmark_circuit("rxx_rzz", 10, 2, 42)
|
||||
observable = {"pauli_string_pattern": "XZ"}
|
||||
exact_hamiltonian = hamiltonians.SymbolicHamiltonian(
|
||||
form=X(0) * Z(1) * X(2) * Z(3) * X(4) * Z(5) * X(6) * Z(7) * X(8) * Z(9)
|
||||
)
|
||||
exact = exact_hamiltonian.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": False,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
@@ -35,7 +35,7 @@ def test_observable_expval(backend, nqubits):
|
||||
numpy_backend = construct_backend("numpy")
|
||||
ham, ham_form = build_observable(nqubits)
|
||||
circ = build_circuit(nqubits=nqubits, nlayers=1)
|
||||
|
||||
|
||||
exact_expval = numpy_backend.calculate_expectation_state(
|
||||
hamiltonian=ham,
|
||||
state=circ().state(),
|
||||
|
||||
46
tests/test_parallel.py
Normal file
46
tests/test_parallel.py
Normal file
@@ -0,0 +1,46 @@
|
||||
import numpy as np
|
||||
|
||||
from qibotn.parallel import _split_repeats, contract_tree_slices, mpi_slice_plan
|
||||
|
||||
|
||||
def test_mpi_slice_plan_block_balances_contiguous_ranges():
|
||||
plans = [mpi_slice_plan(10, rank, 4, assignment="block") for rank in range(4)]
|
||||
|
||||
assert [plan.indices for plan in plans] == [
|
||||
(0, 1, 2),
|
||||
(3, 4, 5),
|
||||
(6, 7),
|
||||
(8, 9),
|
||||
]
|
||||
|
||||
|
||||
def test_mpi_slice_plan_cyclic_balances_round_robin():
|
||||
plans = [mpi_slice_plan(10, rank, 4, assignment="cyclic") for rank in range(4)]
|
||||
|
||||
assert [plan.indices for plan in plans] == [
|
||||
(0, 4, 8),
|
||||
(1, 5, 9),
|
||||
(2, 6),
|
||||
(3, 7),
|
||||
]
|
||||
|
||||
|
||||
class DummyTree:
|
||||
def contract_slice(self, arrays, i, backend=None):
|
||||
return arrays[0] * (i + 1)
|
||||
|
||||
|
||||
def test_contract_tree_slices_sums_numpy_slices():
|
||||
result = contract_tree_slices(
|
||||
DummyTree(),
|
||||
[np.asarray([2.0 + 0.0j])],
|
||||
(0, 2, 3),
|
||||
backend="numpy",
|
||||
)
|
||||
|
||||
np.testing.assert_allclose(result, np.asarray([16.0 + 0.0j]))
|
||||
|
||||
|
||||
def test_split_repeats_balances_workers():
|
||||
assert _split_repeats(10, 4) == [3, 3, 2, 2]
|
||||
assert _split_repeats(2, 4) == [1, 1]
|
||||
400
tests/test_vidal_backend.py
Normal file
400
tests/test_vidal_backend.py
Normal file
@@ -0,0 +1,400 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import Symbol, X, Y, Z
|
||||
|
||||
from qibotn.benchmark_cases import exact_pauli_sum
|
||||
from qibotn.backends.vidal import (
|
||||
VidalBackend,
|
||||
_can_route_non_adjacent,
|
||||
_unsupported_reason,
|
||||
_operator_terms_to_mpo,
|
||||
_symbolic_hamiltonian_to_operator_terms,
|
||||
)
|
||||
from qibotn.backends.vidal_tebd import (
|
||||
VidalTEBDExecutor,
|
||||
_route_non_adjacent_gates,
|
||||
_gate_sites,
|
||||
)
|
||||
|
||||
|
||||
def build_local_circuit(nqubits=8, nlayers=3, seed=42):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
return circuit
|
||||
|
||||
|
||||
def test_vidal_backend_expectation_matches_statevector():
|
||||
circuit = build_local_circuit()
|
||||
observable = hamiltonians.SymbolicHamiltonian(
|
||||
form=0.5 * X(0) * Z(1) + 0.25 * Y(2) * Y(3) - 0.7 * Z(7)
|
||||
)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(max_bond_dimension=128, tensor_module="torch")
|
||||
value = backend.expectation(circuit, observable)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_accepts_unlimited_bond_and_no_cutoff():
|
||||
circuit = build_local_circuit(nqubits=6, nlayers=2)
|
||||
observable = hamiltonians.SymbolicHamiltonian(
|
||||
form=0.5 * X(0) * Z(1) - 0.7 * Z(5)
|
||||
)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=None,
|
||||
cut_ratio=None,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_fallback_for_non_adjacent_gate():
|
||||
"""compile_circuit=False (default) → falls back to qmatchatea for non-adjacent."""
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(max_bond_dimension=32, tensor_module="torch")
|
||||
value = backend.expectation(circuit, observable)
|
||||
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_routes_non_adjacent_with_compile():
|
||||
"""Non-adjacent gate with compile_circuit=True goes through Vidal SWAP routing."""
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=32, tensor_module="torch", compile_circuit=True,
|
||||
)
|
||||
value = backend.expectation(circuit, observable)
|
||||
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_can_route_non_adjacent():
|
||||
"""_can_route_non_adjacent correctly identifies routable circuits."""
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
assert _can_route_non_adjacent(circuit)
|
||||
|
||||
circuit.add(gates.CNOT(0, 1))
|
||||
assert _can_route_non_adjacent(circuit)
|
||||
|
||||
|
||||
def test_cannot_route_multi_qubit():
|
||||
"""Circuits with 3+ qubit gates cannot be routed."""
|
||||
circuit = Circuit(3)
|
||||
circuit.add(gates.TOFFOLI(0, 1, 2))
|
||||
assert not _can_route_non_adjacent(circuit)
|
||||
|
||||
|
||||
def test_routing_preserves_adjacent_gates():
|
||||
"""_route_non_adjacent_gates leaves adjacent gates unchanged."""
|
||||
circuit = build_local_circuit(nqubits=4, nlayers=2)
|
||||
original = list(circuit.queue)
|
||||
routed = _route_non_adjacent_gates(original, 4)
|
||||
|
||||
# Count 2Q gates — should be more due to inserted SWAPs, so just
|
||||
# check that all 2-site gates ARE adjacent.
|
||||
for gate in routed:
|
||||
sites = _gate_sites(gate)
|
||||
if len(sites) == 2:
|
||||
diff = abs(sites[0] - sites[1])
|
||||
assert diff == 1, f"Non-adjacent gate after routing: {gate.name} on {sites}"
|
||||
|
||||
|
||||
def test_routing_non_adjacent_cnot():
|
||||
"""Manually verify SWAP+CNOT+unSWAP for CNOT(0,3)."""
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.H(3))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
|
||||
routed = _route_non_adjacent_gates(list(circuit.queue), 4)
|
||||
|
||||
# Expected: H(0), H(3), SWAP(2,3), SWAP(1,2), routed CNOT on (0,1), SWAP(1,2), SWAP(2,3)
|
||||
names = [getattr(g, "name", g.__class__.__name__) for g in routed]
|
||||
assert names == ["h", "h", "swap", "swap", "routed_two_qubit", "swap", "swap"], f"Got {names}"
|
||||
|
||||
# Verify expectation through full pipeline
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=32, tensor_module="torch", compile_circuit=True,
|
||||
)
|
||||
value = backend.expectation(circuit, observable)
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_routing_preserves_reversed_non_adjacent_gate_order():
|
||||
circuit = Circuit(6)
|
||||
circuit.add(gates.X(5))
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(5, 0))
|
||||
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=X(0) + Z(5) + Z(0) * Z(5))
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64,
|
||||
tensor_module="torch",
|
||||
compile_circuit=True,
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_preprocesses_non_adjacent_circuit():
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64,
|
||||
tensor_module="torch",
|
||||
compile_circuit=True,
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=True)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_preprocesses_toffoli_locally():
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.H(1))
|
||||
circuit.add(gates.TOFFOLI(0, 1, 3))
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=128,
|
||||
tensor_module="torch",
|
||||
compile_circuit=True,
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=True)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_expectation_preserves_complex_coefficients():
|
||||
circuit = Circuit(1)
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=(1.0 + 2.0j) * Z(0))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=8,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, 1.0 + 2.0j, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_expectation_supports_custom_local_symbols():
|
||||
circuit = build_local_circuit(nqubits=4, nlayers=2)
|
||||
a0 = Symbol(0, np.array([[0.2, 1.0], [1.0, -0.3]], dtype=complex), name="A")
|
||||
b2 = Symbol(2, np.array([[0.7, -0.4j], [0.4j, 0.1]], dtype=complex), name="B")
|
||||
a3 = Symbol(3, np.array([[0.5, 0.2], [0.2, -0.8]], dtype=complex), name="A")
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=0.7 * a0 * b2 - 0.4 * a3)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_executor_mpo_expectation_matches_pauli_sum():
|
||||
circuit = build_local_circuit(nqubits=4, nlayers=2)
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=64,
|
||||
tensor_module="torch",
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
|
||||
x = np.array([[0, 1], [1, 0]], dtype=complex)
|
||||
z = np.array([[1, 0], [0, -1]], dtype=complex)
|
||||
i2 = np.eye(2, dtype=complex)
|
||||
mpo = [
|
||||
x.reshape(1, 2, 2, 1),
|
||||
z.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
]
|
||||
mpo_value = executor.expectation_mpo(mpo)
|
||||
pauli_value = executor.expectation_pauli_sum([(1.0, (("X", 0), ("Z", 1)))])
|
||||
|
||||
np.testing.assert_allclose(mpo_value, pauli_value, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_accepts_mpo_observable_dict():
|
||||
circuit = build_local_circuit(nqubits=4, nlayers=2)
|
||||
x = np.array([[0, 1], [1, 0]], dtype=complex)
|
||||
z = np.array([[1, 0], [0, -1]], dtype=complex)
|
||||
i2 = np.eye(2, dtype=complex)
|
||||
mpo = [
|
||||
x.reshape(1, 2, 2, 1),
|
||||
z.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
]
|
||||
exact = exact_pauli_sum(circuit, [(1.0, (("X", 0), ("Z", 1)))], 4)
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, {"mpo_tensors": mpo}, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_symbolic_hamiltonian_auto_mpo_matches_operator_sum():
|
||||
circuit = build_local_circuit(nqubits=5, nlayers=2)
|
||||
observable = hamiltonians.SymbolicHamiltonian(
|
||||
form=0.3 * X(0) * Z(1) - 0.2j * Y(2) + 0.7 * Z(3) * X(4)
|
||||
)
|
||||
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=64,
|
||||
tensor_module="torch",
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(observable)
|
||||
|
||||
term_value = executor.expectation_operator_sum(terms)
|
||||
mpo_value = executor.expectation_mpo(_operator_terms_to_mpo(terms, circuit.nqubits))
|
||||
|
||||
np.testing.assert_allclose(mpo_value, term_value, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_accepts_dense_two_qubit_observable():
|
||||
circuit = Circuit(2)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 1))
|
||||
|
||||
bell = np.zeros((4, 4), dtype=complex)
|
||||
bell[0, 0] = bell[0, 3] = bell[3, 0] = bell[3, 3] = 0.5
|
||||
observable = {"matrix": bell, "qubits": [0, 1]}
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=16,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, 1.0, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_dense_observable_preserves_complex_value():
|
||||
circuit = Circuit(2)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.H(1))
|
||||
|
||||
op = np.zeros((4, 4), dtype=complex)
|
||||
op[0, 3] = 1.0
|
||||
observable = {"coefficient": 1.0j, "matrix": op, "qubits": [0, 1]}
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=16,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, 0.25j, atol=1e-12)
|
||||
|
||||
|
||||
def test_truncation_error_no_truncation():
|
||||
"""With large bond, truncation error should be essentially zero."""
|
||||
circuit = build_local_circuit(nqubits=6, nlayers=2)
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=0.5 * X(0) * Z(1))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(max_bond_dimension=256, tensor_module="torch")
|
||||
value = backend.expectation(circuit, observable)
|
||||
_ = value # ensure computation runs
|
||||
|
||||
assert backend.last_truncation_error < 1e-14, (
|
||||
f"Expected near-zero truncation error, got {backend.last_truncation_error}"
|
||||
)
|
||||
assert backend.last_max_truncation_error < 1e-14, (
|
||||
"Expected near-zero max truncation error, got "
|
||||
f"{backend.last_max_truncation_error}"
|
||||
)
|
||||
|
||||
|
||||
def test_vidal_backend_matches_statevector_multiterm():
|
||||
"""Multi-term observable with non-adjacent gates, compile_circuit=True."""
|
||||
circuit = Circuit(5)
|
||||
for q in range(5):
|
||||
circuit.add(gates.RY(q, theta=0.7))
|
||||
circuit.add(gates.RZ(q, theta=0.3))
|
||||
circuit.add(gates.CNOT(0, 2))
|
||||
circuit.add(gates.CNOT(1, 4))
|
||||
|
||||
observable = hamiltonians.SymbolicHamiltonian(
|
||||
form=(0.3 * X(0) * Z(2) + 0.7 * Y(1) * Y(4) - 0.5 * Z(0) * X(4))
|
||||
)
|
||||
|
||||
exact_state = circuit().state(numpy=True)
|
||||
exact = observable.expectation_from_state(exact_state)
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64, tensor_module="torch", compile_circuit=True,
|
||||
)
|
||||
value = backend.expectation(circuit, observable)
|
||||
np.testing.assert_allclose(value, exact, atol=1e-10)
|
||||
18
tools/README.md
Normal file
18
tools/README.md
Normal file
@@ -0,0 +1,18 @@
|
||||
# Tools
|
||||
|
||||
Auxiliary scripts for profiling, legacy comparisons, and scale probes.
|
||||
|
||||
The main CPU expectation entrypoint is `../benchmark_cpu_expectation.py`.
|
||||
For the current Vidal/MPS 1D-chain tests, prefer `../run_vidal_mps_cases.sh`.
|
||||
|
||||
Files here are intentionally secondary:
|
||||
|
||||
- `compare_vidal_backend_qmatchatea.py`: diagnostic comparison against QMatchaTea.
|
||||
- `profile_vidal_chrome.py`: PyTorch CPU profiler for the Vidal path.
|
||||
- `run_cpu_single_cases.sh`: single-node scale probes.
|
||||
- `run_cpu_large_cases.sh`: two-node MPI scale probes.
|
||||
- `run_vidal_segment_mpi_scan.sh`: rank/thread scaling scan for Vidal segmented MPI.
|
||||
- `baseline_mps_expectation.py`: legacy MPS comparison CLI kept for old commands.
|
||||
- `benchmark_tn_mpi.py`, `benchmark_search.py`, `benchmark_slice.py`, `benchmark_contract_sliced.py`, `check_tree.py`: old TN path-search/slicing experiments.
|
||||
- `qibojit_reference_expectation.py`: state-vector reference helper.
|
||||
- `validate_vidal_mpi_correctness.py`: focused Vidal MPI correctness helper.
|
||||
201
tools/baseline_mps_expectation.py
Normal file
201
tools/baseline_mps_expectation.py
Normal file
@@ -0,0 +1,201 @@
|
||||
"""MPS expectation benchmark for qmatchatea and Vidal backends."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
from qibotn.benchmark_cases import (
|
||||
build_circuit as build_benchmark_circuit,
|
||||
exact_pauli_sum,
|
||||
observable_terms,
|
||||
terms_to_dict,
|
||||
)
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal_tebd import run_vidal_ring_xz
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
return build_benchmark_circuit("brickwall_cnot", nqubits, nlayers, seed)
|
||||
|
||||
|
||||
def build_observable(nqubits):
|
||||
return terms_to_dict(observable_terms("ring_xz", nqubits))
|
||||
|
||||
|
||||
def exact_expectation(circuit, nqubits):
|
||||
return exact_pauli_sum(circuit, observable_terms("ring_xz", nqubits), nqubits)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=40)
|
||||
parser.add_argument("--nlayers", type=int, default=30)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=512)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument(
|
||||
"--executor",
|
||||
choices=("qmatchatea", "vidal", "vidal-mpi"),
|
||||
default="qmatchatea",
|
||||
)
|
||||
parser.add_argument("--mpi-ct", action="store_true")
|
||||
parser.add_argument("--mpi-barriers", type=int, default=-1)
|
||||
parser.add_argument("--mpi-isometrization", type=int, default=-1)
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--reference-file")
|
||||
parser.add_argument(
|
||||
"--mpi-rank-map",
|
||||
action="store_true",
|
||||
help="Print MPI rank, host, pid, and torch thread placement metadata.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
logging.getLogger("qibo.config").setLevel(logging.ERROR)
|
||||
logging.getLogger("qtealeaves").setLevel(logging.ERROR)
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
rank = 0
|
||||
size = 1
|
||||
if args.mpi_ct:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
size = MPI.COMM_WORLD.Get_size()
|
||||
if args.mpi_rank_map:
|
||||
rank_info = {
|
||||
"rank": rank,
|
||||
"size": size,
|
||||
"host": socket.gethostname(),
|
||||
"pid": os.getpid(),
|
||||
"torch_threads": args.torch_threads,
|
||||
"omp_num_threads": os.environ.get("OMP_NUM_THREADS", ""),
|
||||
"mkl_num_threads": os.environ.get("MKL_NUM_THREADS", ""),
|
||||
}
|
||||
rank_infos = MPI.COMM_WORLD.gather(rank_info, root=0)
|
||||
if rank == 0:
|
||||
print("mpi_rank_map")
|
||||
for item in sorted(rank_infos, key=lambda row: row["rank"]):
|
||||
print(
|
||||
"rank={rank} size={size} host={host} pid={pid} "
|
||||
"torch_threads={torch_threads} "
|
||||
"OMP_NUM_THREADS={omp_num_threads} "
|
||||
"MKL_NUM_THREADS={mkl_num_threads}".format(**item)
|
||||
)
|
||||
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed)
|
||||
observable = build_observable(args.nqubits)
|
||||
exact = None
|
||||
if args.reference_file:
|
||||
with open(args.reference_file, "r", encoding="utf-8") as f:
|
||||
exact = float(json.load(f)["expectation"])
|
||||
elif args.exact:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_expectation(circuit, args.nqubits)
|
||||
|
||||
if rank == 0:
|
||||
if args.mpi_ct and args.executor in ("vidal", "vidal-mpi"):
|
||||
mpi_label = f"VidalSegment/{size}"
|
||||
else:
|
||||
mpi_label = f"MPIMPS/{size}" if args.mpi_ct else "SR"
|
||||
print(
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={format_optional(args.bond)} "
|
||||
f"cut_ratio={format_optional(args.cut_ratio)} seed={args.seed} "
|
||||
f"tensor_module={args.tensor_module} svd_control=E! "
|
||||
f"compile_circuit=True mpi={mpi_label} executor={args.executor}"
|
||||
)
|
||||
if exact is not None:
|
||||
print(f"exact={exact:.16e}")
|
||||
print("expval abs_error rel_error seconds")
|
||||
|
||||
start = time.perf_counter()
|
||||
timings = None
|
||||
if args.executor in ("vidal", "vidal-mpi"):
|
||||
if args.executor == "vidal-mpi" and not args.mpi_ct:
|
||||
raise ValueError("--executor vidal-mpi requires --mpi-ct.")
|
||||
if args.mpi_ct:
|
||||
from qibotn.backends.vidal_mpi_segment import run_segment_vidal_mpi_ring_xz
|
||||
|
||||
value, timings = run_segment_vidal_mpi_ring_xz(
|
||||
circuit,
|
||||
max_bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module=args.tensor_module,
|
||||
comm=MPI.COMM_WORLD,
|
||||
)
|
||||
else:
|
||||
value = run_vidal_ring_xz(
|
||||
circuit,
|
||||
max_bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module=args.tensor_module,
|
||||
)
|
||||
else:
|
||||
backend = QMatchaTeaBackend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
svd_control="E!",
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
mpi_approach="CT" if args.mpi_ct else "SR",
|
||||
mpi_num_procs=size,
|
||||
mpi_where_barriers=args.mpi_barriers if args.mpi_ct else -1,
|
||||
mpi_isometrization=args.mpi_isometrization,
|
||||
)
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=False,
|
||||
compile_circuit=True,
|
||||
)
|
||||
max_timings = None
|
||||
if timings:
|
||||
max_timings = {
|
||||
key: MPI.COMM_WORLD.reduce(local_value, op=MPI.MAX, root=0)
|
||||
for key, local_value in timings.items()
|
||||
}
|
||||
if rank != 0:
|
||||
return
|
||||
value = float(np.real(value))
|
||||
elapsed = time.perf_counter() - start
|
||||
abs_error = float("nan") if exact is None else abs(value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
print(f"{value:.16e} {abs_error:.6e} {rel_error:.6e} {elapsed:.3f}")
|
||||
if max_timings:
|
||||
print("timing_section max_seconds")
|
||||
for key, max_value in max_timings.items():
|
||||
print(f"{key} {max_value:.6f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
56
tools/benchmark_contract_sliced.py
Normal file
56
tools/benchmark_contract_sliced.py
Normal file
@@ -0,0 +1,56 @@
|
||||
"""MPI parallel sliced contraction using pre-sliced tree."""
|
||||
import time, pickle, os
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
|
||||
NQUBITS, NLAYERS, NCORES = 25, 10, 48
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
|
||||
os.environ['OMP_NUM_THREADS'] = str(NCORES)
|
||||
os.environ['MKL_NUM_THREADS'] = str(NCORES)
|
||||
|
||||
import torch
|
||||
import qibo, quimb as qu
|
||||
from qibotn.observables import build_random_circuit
|
||||
|
||||
torch.set_num_threads(NCORES)
|
||||
|
||||
circuit = build_random_circuit(NQUBITS, NLAYERS)
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
backend = qibo.get_backend()
|
||||
backend.configure_tn_simulation(ansatz="tn")
|
||||
qc = backend._qibo_circuit_to_quimb(circuit, backend.circuit_ansatz)
|
||||
tn = qc.local_expectation(qu.pauli('x') & qu.pauli('z'), (0, 1), rehearse='tn')
|
||||
|
||||
if rank == 0:
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
else:
|
||||
tree = None
|
||||
tree = comm.bcast(tree, root=0)
|
||||
|
||||
arrays = [torch.from_numpy(np.asarray(t._data)) for t in tn.tensors]
|
||||
n_slices = tree.multiplicity
|
||||
|
||||
if rank == 0:
|
||||
print(f"Slices: {n_slices}, Ranks: {size}, "
|
||||
f"Peak: {tree.max_size() * 16 / 1e9:.2f} GB, "
|
||||
f"Threads/rank: {NCORES}, Backend: torch")
|
||||
|
||||
t0 = time.time()
|
||||
result = None
|
||||
for i in range(rank, n_slices, size):
|
||||
val = tree.contract_slice(arrays, i, backend='torch')
|
||||
val_np = val.cpu().numpy().reshape(-1)
|
||||
result = val_np if result is None else result + val_np
|
||||
|
||||
if result is None:
|
||||
result = np.zeros(1, dtype=np.complex128)
|
||||
|
||||
total = np.zeros_like(result) if rank == 0 else None
|
||||
comm.Reduce(result, total, root=0)
|
||||
|
||||
if rank == 0:
|
||||
print(f"Contract: {time.time() - t0:.4f}s Expectation: {0.5 * total[0].real:.10f}")
|
||||
34
tools/benchmark_search.py
Normal file
34
tools/benchmark_search.py
Normal file
@@ -0,0 +1,34 @@
|
||||
"""Search contraction path and save."""
|
||||
import time, os, pickle
|
||||
from qibotn.parallel import parallel_path_search
|
||||
from qibotn.observables import build_random_circuit
|
||||
import qibo, quimb as qu
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
NQUBITS, NLAYERS, WORKERS = 20, 10, 96
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
method = 'mpi' if size > 1 else 'processpool'
|
||||
|
||||
circuit = build_random_circuit(NQUBITS, NLAYERS)
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
backend = qibo.get_backend()
|
||||
backend.configure_tn_simulation(ansatz="tn")
|
||||
qc = backend._qibo_circuit_to_quimb(circuit, backend.circuit_ansatz)
|
||||
tn = qc.local_expectation(qu.pauli('x') & qu.pauli('z'), (0, 1), rehearse='tn')
|
||||
|
||||
if rank == 0:
|
||||
print(f"Searching {NQUBITS}q {NLAYERS}l, method={method}, ranks={size}, workers/rank={WORKERS}...")
|
||||
t0 = time.time()
|
||||
tree = parallel_path_search(tn, tn.outer_inds(), method=method,
|
||||
total_repeats=1024, max_time=300, n_workers=WORKERS,trial_timeout=60)
|
||||
t_search = time.time() - t0
|
||||
|
||||
if rank == 0:
|
||||
os.makedirs('data', exist_ok=True)
|
||||
path = f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl"
|
||||
with open(path, 'wb') as f:
|
||||
pickle.dump(tree, f)
|
||||
print(f"Search: {t_search:.2f}s Peak: {tree.max_size() * 16 / 1e9:.2f} GB Saved: {path}")
|
||||
16
tools/benchmark_slice.py
Normal file
16
tools/benchmark_slice.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Slice saved tree and save."""
|
||||
import pickle
|
||||
|
||||
NQUBITS, NLAYERS = 25, 10
|
||||
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl", 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
|
||||
print(f"Original peak: {tree.max_size() * 16 / 1e9:.2f} GB")
|
||||
|
||||
tree_sliced = tree.slice_and_reconfigure(target_size=2**28)
|
||||
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'wb') as f:
|
||||
pickle.dump(tree_sliced, f)
|
||||
|
||||
print(f"Sliced peak: {tree_sliced.max_size() * 16 / 1e9:.2f} GB Slices: {tree_sliced.multiplicity}")
|
||||
378
tools/benchmark_tn_mpi.py
Normal file
378
tools/benchmark_tn_mpi.py
Normal file
@@ -0,0 +1,378 @@
|
||||
"""MPI-parallel TN benchmark: path search + contraction via MPI."""
|
||||
import json
|
||||
import pickle
|
||||
import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
import cotengra as ctg
|
||||
import qibo
|
||||
from qibo import Circuit, gates
|
||||
from mpi4py import MPI
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from qibotn.observables import check_observable, extract_gates_and_qubits
|
||||
|
||||
|
||||
def _load_observable(observable_file=None, observable_json=None):
|
||||
if observable_file:
|
||||
with open(observable_file, "r", encoding="utf8") as f:
|
||||
return json.load(f)
|
||||
if observable_json:
|
||||
return json.loads(observable_json)
|
||||
return None
|
||||
|
||||
|
||||
def _term_to_quimb_operator(term):
|
||||
"""Convert one extracted Hamiltonian term to a quimb operator."""
|
||||
import quimb as qu
|
||||
|
||||
coeff = complex(term[0][2]) if term else 1.0
|
||||
op = None
|
||||
where = []
|
||||
|
||||
for qubit, gate_name, _ in term:
|
||||
qubit = int(qubit)
|
||||
gate_name = str(gate_name).upper()
|
||||
if gate_name == "I":
|
||||
continue
|
||||
where.append(qubit)
|
||||
op = qu.pauli(gate_name.lower()) if op is None else op & qu.pauli(gate_name.lower())
|
||||
|
||||
return complex(coeff), op, tuple(where)
|
||||
|
||||
|
||||
def _run_serial_search(tn_bytes, output_inds, repeats, seed, num_slices, n_ranks, max_time):
|
||||
import pickle, cotengra as ctg, random
|
||||
random.seed(seed)
|
||||
tn = pickle.loads(tn_bytes)
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=['kahypar', 'kahypar-agglom', 'spinglass'],
|
||||
max_repeats=repeats,
|
||||
parallel=False,
|
||||
minimize='combo-256',
|
||||
max_time=max_time,
|
||||
optlib="random",
|
||||
slicing_opts={'target_size': 2**29, 'allow_outer': True},
|
||||
progbar=False,
|
||||
)
|
||||
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
|
||||
return tree.combo_cost(factor=256), tree
|
||||
|
||||
|
||||
def parallel_search(tn, output_inds, total_repeats, n_workers, num_slices, n_ranks,
|
||||
timeout):
|
||||
import pickle, os, signal
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
if n_workers <= 1:
|
||||
return _run_serial_search(
|
||||
tn_bytes, output_inds, total_repeats, 0, num_slices, n_ranks, timeout
|
||||
)[1]
|
||||
repeats_per = max(1, total_repeats // n_workers)
|
||||
best_cost, best_tree = float('inf'), None
|
||||
|
||||
pool = ProcessPoolExecutor(max_workers=n_workers)
|
||||
futures = [
|
||||
pool.submit(_run_serial_search, tn_bytes, output_inds,
|
||||
repeats_per, seed, num_slices, n_ranks, timeout)
|
||||
for seed in range(n_workers)
|
||||
]
|
||||
try:
|
||||
for fut in as_completed(futures, timeout=timeout + 5):
|
||||
try:
|
||||
cost, tree = fut.result()
|
||||
if cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
except Exception as e:
|
||||
print(f" [worker failed] {e}")
|
||||
except TimeoutError:
|
||||
pass
|
||||
finally:
|
||||
for fut in futures:
|
||||
fut.cancel()
|
||||
for pid in list(pool._processes.keys()):
|
||||
try:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
pool.shutdown(wait=False)
|
||||
|
||||
return best_tree
|
||||
|
||||
|
||||
def make_circuit(circuit_type, nqubits, nlayers=1):
|
||||
c = Circuit(nqubits)
|
||||
if circuit_type == "qft":
|
||||
from qibo.models import QFT
|
||||
return QFT(nqubits)
|
||||
elif circuit_type == "variational":
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
offset = layer % 2
|
||||
for q in range(offset, nqubits - 1, 2):
|
||||
c.add(gates.CZ(q, q + 1))
|
||||
elif circuit_type == "ghz":
|
||||
c.add(gates.H(0))
|
||||
for q in range(nqubits - 1):
|
||||
c.add(gates.CNOT(q, q + 1))
|
||||
elif circuit_type == "brickwork":
|
||||
for q in range(nqubits):
|
||||
c.add(gates.H(q))
|
||||
for layer in range(nlayers):
|
||||
offset = layer % 2
|
||||
for q in range(offset, nqubits - 1, 2):
|
||||
c.add(gates.CNOT(q, q + 1))
|
||||
c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit: {circuit_type}")
|
||||
return c
|
||||
|
||||
|
||||
def _contract_mpi(tree, arrays, comm, root=0):
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
is_torch = type(arrays[0]).__module__.startswith("torch")
|
||||
|
||||
result_np = None
|
||||
for i in range(rank, tree.multiplicity, size):
|
||||
x = tree.contract_slice(arrays, i)
|
||||
x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x))
|
||||
result_np = x_np if result_np is None else result_np + x_np
|
||||
|
||||
if result_np is None:
|
||||
result_np = np.zeros(1, dtype=np.complex128)
|
||||
|
||||
result = np.zeros_like(result_np) if rank == root else None
|
||||
comm.Reduce(result_np, result, root=root)
|
||||
|
||||
if rank == root:
|
||||
import torch
|
||||
return torch.from_numpy(np.asarray(result)) if is_torch else result
|
||||
return None
|
||||
|
||||
|
||||
def run_mpi(circuit, nqubits, num_slices, total_repeats=1024,
|
||||
load_path=None, save_path=None):
|
||||
"""Each MPI rank runs serial path search over total_repeats/size trials,
|
||||
rank 0 picks the global best, then all ranks contract in parallel."""
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
b = qibo.get_backend()
|
||||
b.configure_tn_simulation(ansatz="tn")
|
||||
|
||||
import torch
|
||||
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
|
||||
gate_opts={"max_bond": None, "cutoff": 1e-10})
|
||||
qc.to_backend = lambda x: torch.from_numpy(x).to(torch.complex128)
|
||||
|
||||
# --- path search: each rank serial, gather best to rank 0 ---
|
||||
if load_path:
|
||||
if rank == 0:
|
||||
with open(load_path, "rb") as f:
|
||||
saved = pickle.load(f)
|
||||
tree, psi, t_search = saved["tree"], saved["psi"], 0.0
|
||||
print(f" [path loaded] {load_path}")
|
||||
else:
|
||||
tree = psi = None
|
||||
t_search = 0.0
|
||||
else:
|
||||
rank_repeats = max(1, total_repeats // size)
|
||||
t0 = time.time()
|
||||
# get TN object first (no contraction), then run parallel search
|
||||
psi_tn = qc.to_dense(rehearse="tn")
|
||||
local_tree = parallel_search(
|
||||
psi_tn, psi_tn.outer_inds(), rank_repeats, n_workers=48,
|
||||
num_slices=num_slices, n_ranks=size, timeout=600,
|
||||
)
|
||||
t_search = time.time() - t0
|
||||
local_psi = psi_tn
|
||||
|
||||
all_results = comm.gather((local_tree.combo_cost(factor=256), local_tree, local_psi), root=0)
|
||||
if rank == 0:
|
||||
_, tree, psi = min(all_results, key=lambda x: x[0])
|
||||
print(f" [path search] {t_search:.3f}s "
|
||||
f"flops~2^{tree.contraction_cost(log=2):.2f} "
|
||||
f"size~2^{tree.contraction_width():.2f} "
|
||||
f"slices={tree.multiplicity}")
|
||||
if save_path:
|
||||
with open(save_path, "wb") as f:
|
||||
pickle.dump({"tree": tree, "psi": psi}, f)
|
||||
print(f" [path saved] {save_path}")
|
||||
else:
|
||||
tree = psi = None
|
||||
|
||||
if save_path:
|
||||
t_search = comm.bcast(t_search, root=0)
|
||||
return None, t_search
|
||||
|
||||
tree = comm.bcast(tree, root=0)
|
||||
psi = comm.bcast(psi, root=0)
|
||||
t_search = comm.bcast(t_search, root=0)
|
||||
|
||||
# --- contraction: all ranks work in parallel ---
|
||||
import torch
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in psi.arrays]
|
||||
t0 = time.time()
|
||||
sv = _contract_mpi(tree, arrays, comm, root=0)
|
||||
t_contract = time.time() - t0
|
||||
|
||||
if rank == 0:
|
||||
print(f" [contraction] {t_contract:.3f}s")
|
||||
return np.array(sv).reshape(-1), t_search + t_contract
|
||||
return None, t_search + t_contract
|
||||
|
||||
|
||||
def run_mpi_expval(
|
||||
circuit,
|
||||
nqubits,
|
||||
observable=None,
|
||||
total_repeats=1024,
|
||||
search_workers=1,
|
||||
search_timeout=300,
|
||||
):
|
||||
"""Compute a Hamiltonian expectation value directly from TN via MPI.
|
||||
MPI parallelizes over Hamiltonian terms; ProcessPool optionally helps
|
||||
path search for each term."""
|
||||
import torch
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
b = qibo.get_backend()
|
||||
b.configure_tn_simulation(ansatz="tn")
|
||||
|
||||
observable = check_observable(observable, nqubits)
|
||||
ham_gate_map = extract_gates_and_qubits(observable)
|
||||
|
||||
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
|
||||
gate_opts={"max_bond": None, "cutoff": 1e-10})
|
||||
|
||||
my_terms = ham_gate_map[rank::size]
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
t0 = time.time()
|
||||
|
||||
my_exp = 0.0 + 0.0j
|
||||
for term in my_terms:
|
||||
coeff, op, where = _term_to_quimb_operator(term)
|
||||
if op is None:
|
||||
my_exp += coeff
|
||||
continue
|
||||
tn = qc.local_expectation_tn(op, where=where)
|
||||
if len(tn.outer_inds()) == 0:
|
||||
val = complex(tn.contract())
|
||||
else:
|
||||
tree = parallel_search(
|
||||
tn,
|
||||
tn.outer_inds(),
|
||||
total_repeats,
|
||||
n_workers=search_workers,
|
||||
num_slices=1,
|
||||
n_ranks=size,
|
||||
timeout=search_timeout,
|
||||
)
|
||||
if tree is None:
|
||||
raise RuntimeError("Failed to find a contraction tree for expectation TN.")
|
||||
arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in tn.arrays]
|
||||
acc = sum(tree.contract_slice(arrays, i) for i in range(tree.multiplicity))
|
||||
val = complex(acc.item() if hasattr(acc, 'item') else acc)
|
||||
my_exp += coeff * val
|
||||
|
||||
t_total = time.time() - t0
|
||||
|
||||
all_results = comm.gather(my_exp, root=0)
|
||||
if rank == 0:
|
||||
total_exp = sum(all_results)
|
||||
print(f"\n[TN expval] time={t_total:.4f}s expval={total_exp.real:.12f}")
|
||||
return np.real_if_close(total_exp), t_total
|
||||
return None, t_total
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=30)
|
||||
parser.add_argument("--circuit", type=str, default="qft",
|
||||
choices=["qft", "variational", "ghz", "brickwork"])
|
||||
parser.add_argument("--nlayers", type=int, default=3)
|
||||
parser.add_argument("--num-slices", type=int, default=1)
|
||||
parser.add_argument("--total-repeats", type=int, default=1024)
|
||||
parser.add_argument("--search-workers", type=int, default=1)
|
||||
parser.add_argument("--search-timeout", type=int, default=300)
|
||||
parser.add_argument("--observable-file", type=str, default=None)
|
||||
parser.add_argument("--observable-json", type=str, default=None)
|
||||
parser.add_argument("--save-path", type=str, default=None)
|
||||
parser.add_argument("--load-path", type=str, default=None)
|
||||
parser.add_argument("--no-compare", action="store_true")
|
||||
parser.add_argument("--mode", type=str, default="sv", choices=["sv", "expval"])
|
||||
args = parser.parse_args()
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
|
||||
if rank == 0:
|
||||
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, "
|
||||
f"nlayers={args.nlayers}, ranks={comm.Get_size()}")
|
||||
|
||||
np.random.seed(42)
|
||||
circuit = make_circuit(args.circuit, args.nqubits, args.nlayers)
|
||||
observable = _load_observable(args.observable_file, args.observable_json)
|
||||
|
||||
if args.mode == "expval":
|
||||
try:
|
||||
expval, t_total = run_mpi_expval(
|
||||
circuit,
|
||||
args.nqubits,
|
||||
observable=observable,
|
||||
total_repeats=args.total_repeats,
|
||||
search_workers=args.search_workers,
|
||||
search_timeout=args.search_timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
if rank == 0:
|
||||
print(f"[FAILED] {e}")
|
||||
raise
|
||||
if rank == 0:
|
||||
np.save(f"data/expval_tn_{args.circuit}{args.nqubits}.npy", np.asarray(expval))
|
||||
if not args.no_compare:
|
||||
print("No built-in reference comparison for arbitrary observables.")
|
||||
return
|
||||
|
||||
try:
|
||||
sv, t_total = run_mpi(circuit, args.nqubits, args.num_slices,
|
||||
total_repeats=args.total_repeats,
|
||||
load_path=args.load_path, save_path=args.save_path)
|
||||
except Exception as e:
|
||||
if rank == 0:
|
||||
print(f"[FAILED] {e}")
|
||||
raise
|
||||
|
||||
if rank == 0 and sv is not None:
|
||||
print(f"\n[quimb TN MPI] time={t_total:.4f}s shape={sv.shape}")
|
||||
np.save(f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy", sv)
|
||||
|
||||
if not args.no_compare:
|
||||
from qibotn.bak.benchmark_tn import run_qibojit
|
||||
import gc
|
||||
np.random.seed(42)
|
||||
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
|
||||
sv_ref, t_ref = run_qibojit(circuit_ref)
|
||||
np.save(f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy", sv_ref)
|
||||
print(f"[qibojit] time={t_ref:.4f}s")
|
||||
# free memory before loading via mmap for expval comparison
|
||||
del sv, sv_ref
|
||||
gc.collect()
|
||||
from compare_jit_tn_quimb import check_results
|
||||
ref_path = f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy"
|
||||
tn_path = f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy"
|
||||
check_results(ref_path, tn_path, args.nqubits)
|
||||
if t_total > 0:
|
||||
print(f"Speedup : {t_ref/t_total:.2f}x")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
25
tools/check_tree.py
Normal file
25
tools/check_tree.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Check contraction tree statistics."""
|
||||
import pickle, sys
|
||||
|
||||
path = sys.argv[1] if len(sys.argv) > 1 else "data/tree_q25_l10.pkl"
|
||||
with open(path, 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
|
||||
# Intel 8558P: 96 cores, 2.1GHz, AVX-512 (16 FP64/cycle), FMA x2
|
||||
# complex128 multiply-add = 6 real FLOPs
|
||||
CORES = 96
|
||||
FREQ = 2.1e9
|
||||
AVX512_FP64 = 16
|
||||
TFLOPS = CORES * FREQ * AVX512_FP64 * 2 / 1e12 # ~6.45 TFLOPS real FP64
|
||||
COMPLEX_FLOPS = TFLOPS / 6 # complex128 effective
|
||||
|
||||
flops = tree.total_flops()
|
||||
slices = tree.multiplicity
|
||||
est_seconds = flops * slices / (COMPLEX_FLOPS * 1e12)
|
||||
|
||||
print(f"File: {path}")
|
||||
print(f"Peak memory (GB): {tree.max_size() * 16 / 1e9:.2f}")
|
||||
print(f"Total FLOPs: {flops:.2e} x{slices} slices = {flops*slices:.2e}")
|
||||
print(f"Contraction width: {tree.contraction_width()}")
|
||||
print(f"Multiplicity (slices): {slices}")
|
||||
print(f"Estimated time (96 cores): {est_seconds:.1f}s ({est_seconds/3600:.2f}h)")
|
||||
137
tools/compare_vidal_backend_qmatchatea.py
Normal file
137
tools/compare_vidal_backend_qmatchatea.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Compare QMatchaTeaBackend with the VidalBackend fast path."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed, kind):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind == "brickwall":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
elif kind == "shifted-cz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(q, q + 1))
|
||||
elif kind == "reversed-cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
return circuit
|
||||
|
||||
|
||||
def build_observable(nqubits, kind):
|
||||
form = 0
|
||||
if kind == "ring-xz":
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
elif kind == "open-zz":
|
||||
for q in range(nqubits - 1):
|
||||
form += Z(q) * Z(q + 1) / (nqubits - 1)
|
||||
elif kind == "mixed":
|
||||
form += 0.25 * X(0) - 0.5 * Z(nqubits - 1)
|
||||
for q in range(0, nqubits - 1, 3):
|
||||
form += 0.125 * Y(q) * Y(q + 1)
|
||||
else:
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def run_backend(backend, circuit, observable):
|
||||
start = time.perf_counter()
|
||||
value = backend.expectation(circuit, observable, preprocess=False, compile_circuit=True)
|
||||
return float(np.real(value)), time.perf_counter() - start
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=34)
|
||||
parser.add_argument("--nlayers", type=int, default=20)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("torch", "numpy"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument(
|
||||
"--circuit-kind",
|
||||
choices=("brickwall", "shifted-cz", "reversed-cnot"),
|
||||
default="brickwall",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--observable-kind",
|
||||
choices=("ring-xz", "open-zz", "mixed"),
|
||||
default="ring-xz",
|
||||
)
|
||||
parser.add_argument("--reference-file")
|
||||
parser.add_argument("--skip-qmatchatea", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed, args.circuit_kind)
|
||||
observable = build_observable(args.nqubits, args.observable_kind)
|
||||
|
||||
exact = None
|
||||
if args.reference_file:
|
||||
with open(args.reference_file, "r", encoding="utf-8") as f:
|
||||
exact = float(json.load(f)["expectation"])
|
||||
|
||||
print(
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} bond={args.bond} "
|
||||
f"circuit={args.circuit_kind} observable={args.observable_kind} "
|
||||
f"tensor_module={args.tensor_module} torch_threads={args.torch_threads}"
|
||||
)
|
||||
if exact is not None:
|
||||
print(f"exact={exact:.16e}")
|
||||
print("backend value abs_error seconds")
|
||||
|
||||
if not args.skip_qmatchatea:
|
||||
qmt = QMatchaTeaBackend()
|
||||
qmt.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
svd_control="E!",
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
)
|
||||
value, seconds = run_backend(qmt, circuit, observable)
|
||||
error = float("nan") if exact is None else abs(value - exact)
|
||||
print(f"qmatchatea {value:.16e} {error:.6e} {seconds:.3f}")
|
||||
|
||||
vidal = VidalBackend()
|
||||
vidal.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
fallback=True,
|
||||
)
|
||||
value, seconds = run_backend(vidal, circuit, observable)
|
||||
error = float("nan") if exact is None else abs(value - exact)
|
||||
print(f"vidal {value:.16e} {error:.6e} {seconds:.3f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
33
tools/example_tn_case.py
Normal file
33
tools/example_tn_case.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""Example custom case for tools/run_tn_custom.py."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
return circuit
|
||||
|
||||
|
||||
def build_observable(nqubits, seed):
|
||||
return {
|
||||
"terms": [
|
||||
{
|
||||
"coefficient": 1.0 / max(1, nqubits - 1),
|
||||
"operators": [("Z", site), ("Z", site + 1)],
|
||||
}
|
||||
for site in range(nqubits - 1)
|
||||
]
|
||||
}
|
||||
208
tools/inspect_contraction_tree.py
Normal file
208
tools/inspect_contraction_tree.py
Normal file
@@ -0,0 +1,208 @@
|
||||
"""Inspect cotengra contraction trees for dominant torch matmul shapes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import importlib
|
||||
import math
|
||||
import pickle
|
||||
from collections import Counter, defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _prod(values):
|
||||
out = 1
|
||||
for value in values:
|
||||
out *= int(value)
|
||||
return out
|
||||
|
||||
|
||||
def _broadcast_batch(a_batch, b_batch):
|
||||
if a_batch == b_batch:
|
||||
return _prod(a_batch)
|
||||
if not a_batch:
|
||||
return _prod(b_batch)
|
||||
if not b_batch:
|
||||
return _prod(a_batch)
|
||||
|
||||
ndim = max(len(a_batch), len(b_batch))
|
||||
a_batch = (1,) * (ndim - len(a_batch)) + tuple(a_batch)
|
||||
b_batch = (1,) * (ndim - len(b_batch)) + tuple(b_batch)
|
||||
return _prod(max(a, b) for a, b in zip(a_batch, b_batch))
|
||||
|
||||
|
||||
def _load_tree(path, index):
|
||||
with Path(path).open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
trees = payload["trees"] if isinstance(payload, dict) else payload
|
||||
if not isinstance(trees, (list, tuple)):
|
||||
trees = [trees]
|
||||
return trees[index]
|
||||
|
||||
|
||||
def _analyze_tree(tree):
|
||||
contract_mod = importlib.import_module("cotengra.contract")
|
||||
contractions = contract_mod.extract_contractions(tree)
|
||||
size_dict = tree.size_dict
|
||||
ops = []
|
||||
counts = Counter()
|
||||
|
||||
for op_index, (parent, left, right, tdot, arg, perm) in enumerate(contractions):
|
||||
if left is None and right is None:
|
||||
counts["preprocess"] += 1
|
||||
continue
|
||||
|
||||
left_inds = tree.get_inds(left)
|
||||
right_inds = tree.get_inds(right)
|
||||
parent_inds = tree.get_inds(parent)
|
||||
left_shape = tuple(size_dict[ix] for ix in left_inds)
|
||||
right_shape = tuple(size_dict[ix] for ix in right_inds)
|
||||
|
||||
if tdot:
|
||||
parsed = contract_mod._parse_tensordot_axes_to_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
else:
|
||||
parsed = contract_mod._parse_eq_to_batch_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
|
||||
(
|
||||
_eq_a,
|
||||
_eq_b,
|
||||
new_shape_a,
|
||||
new_shape_b,
|
||||
_new_shape_ab,
|
||||
_perm_ab,
|
||||
pure_multiplication,
|
||||
) = parsed
|
||||
|
||||
matmul_shape = None
|
||||
matmul_flops = 0
|
||||
if pure_multiplication:
|
||||
kind = "mul"
|
||||
else:
|
||||
a_shape = tuple(new_shape_a or left_shape)
|
||||
b_shape = tuple(new_shape_b or right_shape)
|
||||
batch = _broadcast_batch(a_shape[:-2], b_shape[:-2])
|
||||
m, k, n = int(a_shape[-2]), int(a_shape[-1]), int(b_shape[-1])
|
||||
kind = "mm" if batch == 1 else "bmm"
|
||||
matmul_shape = (batch, m, k, n)
|
||||
matmul_flops = batch * m * k * n
|
||||
|
||||
tree_flops = int(tree.get_flops(parent))
|
||||
out_size = int(tree.get_size(parent))
|
||||
ops.append(
|
||||
{
|
||||
"index": op_index,
|
||||
"kind": kind,
|
||||
"matmul_shape": matmul_shape,
|
||||
"matmul_flops": matmul_flops,
|
||||
"tree_flops": tree_flops,
|
||||
"out_size": out_size,
|
||||
"left_shape": left_shape,
|
||||
"right_shape": right_shape,
|
||||
"left_rank": len(left_inds),
|
||||
"right_rank": len(right_inds),
|
||||
"out_rank": len(parent_inds),
|
||||
"perm": perm,
|
||||
}
|
||||
)
|
||||
counts[kind] += 1
|
||||
|
||||
return contractions, ops, counts
|
||||
|
||||
|
||||
def _format_log(value, base):
|
||||
return "-inf" if value <= 0 else f"{math.log(value, base):.3f}"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("tree", help="Pickle file containing one tree or {'trees': [...]}.")
|
||||
parser.add_argument("--index", type=int, default=0, help="Tree index in the file.")
|
||||
parser.add_argument("--top", type=int, default=20, help="Number of top ops to print.")
|
||||
parser.add_argument(
|
||||
"--dtype-bytes",
|
||||
type=int,
|
||||
default=8,
|
||||
help="Bytes per element for memory estimates, for example 8 for complex64.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tree = _load_tree(args.tree, args.index)
|
||||
contractions, ops, counts = _analyze_tree(tree)
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
per_slice_flops = sum(op["tree_flops"] for op in ops)
|
||||
per_slice_write = sum(op["out_size"] for op in ops)
|
||||
max_out = max((op["out_size"] for op in ops), default=0)
|
||||
all_flops = per_slice_flops * nslices
|
||||
all_write = per_slice_write * nslices
|
||||
|
||||
print(f"tree={args.tree} index={args.index}")
|
||||
print(
|
||||
"summary "
|
||||
f"slices={nslices} contractions={len(contractions)} "
|
||||
f"counts={dict(counts)}"
|
||||
)
|
||||
print(
|
||||
"per_slice "
|
||||
f"log10_flops={_format_log(per_slice_flops, 10)} "
|
||||
f"log10_write={_format_log(per_slice_write, 10)} "
|
||||
f"log2_max_output={_format_log(max_out, 2)} "
|
||||
f"max_output_gib={max_out * args.dtype_bytes / 1024**3:.6g}"
|
||||
)
|
||||
print(
|
||||
"all_slices "
|
||||
f"log10_flops={_format_log(all_flops, 10)} "
|
||||
f"log10_write={_format_log(all_write, 10)}"
|
||||
)
|
||||
|
||||
print(f"\ntop_{args.top}_ops_by_flops")
|
||||
for op in sorted(ops, key=lambda item: item["tree_flops"], reverse=True)[: args.top]:
|
||||
print(
|
||||
f"op={op['index']} kind={op['kind']} "
|
||||
f"flops={op['tree_flops']:.6e} out={op['out_size']:.6e} "
|
||||
f"matmul={op['matmul_shape']} "
|
||||
f"ranks=({op['left_rank']},{op['right_rank']}->{op['out_rank']}) "
|
||||
f"lhs={op['left_shape']} rhs={op['right_shape']}"
|
||||
)
|
||||
|
||||
by_shape = defaultdict(lambda: [0, 0, 0])
|
||||
for op in ops:
|
||||
shape = op["matmul_shape"]
|
||||
if shape is None:
|
||||
continue
|
||||
by_shape[shape][0] += 1
|
||||
by_shape[shape][1] += op["tree_flops"]
|
||||
by_shape[shape][2] += op["out_size"]
|
||||
|
||||
print(f"\ntop_{args.top}_matmul_shapes_by_flops")
|
||||
for shape, (count, flops, out_size) in sorted(
|
||||
by_shape.items(),
|
||||
key=lambda item: item[1][1],
|
||||
reverse=True,
|
||||
)[: args.top]:
|
||||
print(
|
||||
f"shape={shape} count={count} "
|
||||
f"flops={flops:.6e} output={out_size:.6e}"
|
||||
)
|
||||
|
||||
print(f"\ntop_{args.top}_matmul_shapes_by_count")
|
||||
for shape, (count, flops, out_size) in sorted(
|
||||
by_shape.items(),
|
||||
key=lambda item: item[1][0],
|
||||
reverse=True,
|
||||
)[: args.top]:
|
||||
print(
|
||||
f"shape={shape} count={count} "
|
||||
f"flops={flops:.6e} output={out_size:.6e}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
223
tools/manage_tn_dask_cluster.sh
Executable file
223
tools/manage_tn_dask_cluster.sh
Executable file
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Manage the dask cluster used by TN path search.
|
||||
#
|
||||
# Defaults target two servers:
|
||||
# scheduler: 10.20.1.103:8786
|
||||
# workers: 10.20.1.103, 10.20.6.101
|
||||
#
|
||||
# Usage:
|
||||
# tools/manage_tn_dask_cluster.sh start
|
||||
# tools/manage_tn_dask_cluster.sh status
|
||||
# tools/manage_tn_dask_cluster.sh stop
|
||||
#
|
||||
# Common overrides:
|
||||
# SCHEDULER_HOST=10.20.1.103
|
||||
# WORKER_HOSTS="10.20.1.103 10.20.6.101"
|
||||
# NWORKERS=48
|
||||
# NTHREADS=1
|
||||
# ROOT_DIR=/home/yx/qibotn
|
||||
# PYTHON_BIN=.venv/bin/python
|
||||
|
||||
ROOT_DIR="${ROOT_DIR:-/home/yx/qibotn}"
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
SCHEDULER_HOST="${SCHEDULER_HOST:-10.20.1.103}"
|
||||
SCHEDULER_PORT="${SCHEDULER_PORT:-8786}"
|
||||
DASHBOARD_ADDRESS="${DASHBOARD_ADDRESS:-:8787}"
|
||||
WORKER_HOSTS="${WORKER_HOSTS:-10.20.1.103 10.20.6.101}"
|
||||
NWORKERS="${NWORKERS:-84}"
|
||||
NTHREADS="${NTHREADS:-1}"
|
||||
MEMORY_LIMIT="${MEMORY_LIMIT:-0}"
|
||||
LOCAL_DIRECTORY="${LOCAL_DIRECTORY:-/tmp/qibotn-dask}"
|
||||
LOG_DIR="${LOG_DIR:-$ROOT_DIR/logs/dask}"
|
||||
SSH_BIN="${SSH_BIN:-ssh}"
|
||||
DASK_WORKER_TTL="${DASK_WORKER_TTL:-24 hours}"
|
||||
DASK_TICK_LIMIT="${DASK_TICK_LIMIT:-30 minutes}"
|
||||
DASK_LOST_WORKER_TIMEOUT="${DASK_LOST_WORKER_TIMEOUT:-30 minutes}"
|
||||
|
||||
SCHEDULER_ADDR="tcp://${SCHEDULER_HOST}:${SCHEDULER_PORT}"
|
||||
|
||||
is_local_host() {
|
||||
local host="$1"
|
||||
[[ "$host" == "localhost" || "$host" == "127.0.0.1" ]] && return 0
|
||||
[[ "$host" == "$(hostname)" ]] && return 0
|
||||
[[ "$host" == "$(hostname -f 2>/dev/null || true)" ]] && return 0
|
||||
hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx "$host"
|
||||
}
|
||||
|
||||
run_on_host() {
|
||||
local host="$1"
|
||||
shift
|
||||
local cmd="$*"
|
||||
if is_local_host "$host"; then
|
||||
bash -lc "$cmd"
|
||||
else
|
||||
"$SSH_BIN" "$host" "bash -lc $(printf '%q' "$cmd")"
|
||||
fi
|
||||
}
|
||||
|
||||
start_scheduler() {
|
||||
local host="$SCHEDULER_HOST"
|
||||
local log="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.log"
|
||||
local pid_file="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.pid"
|
||||
run_on_host "$host" "
|
||||
set -euo pipefail
|
||||
cd '$ROOT_DIR'
|
||||
mkdir -p '$LOG_DIR'
|
||||
if [[ -s '$pid_file' ]]; then
|
||||
pid=\$(cat '$pid_file')
|
||||
if kill -0 \"\$pid\" 2>/dev/null; then
|
||||
echo \"scheduler already running on $host pid=\$pid\"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL='$DASK_WORKER_TTL' \
|
||||
DASK_DISTRIBUTED__ADMIN__TICK__LIMIT='$DASK_TICK_LIMIT' \
|
||||
DASK_DISTRIBUTED__DEPLOY__LOST_WORKER_TIMEOUT='$DASK_LOST_WORKER_TIMEOUT' \
|
||||
setsid '$PYTHON_BIN' -m distributed.cli.dask_scheduler \
|
||||
--host '$SCHEDULER_HOST' \
|
||||
--port '$SCHEDULER_PORT' \
|
||||
--dashboard-address '$DASHBOARD_ADDRESS' \
|
||||
> '$log' 2>&1 < /dev/null &
|
||||
pid=\$!
|
||||
echo \"\$pid\" > '$pid_file'
|
||||
echo \"scheduler host=$host pid=\$pid addr=$SCHEDULER_ADDR log=$log\"
|
||||
"
|
||||
}
|
||||
|
||||
start_worker() {
|
||||
local host="$1"
|
||||
local log="$LOG_DIR/worker_${host}.log"
|
||||
local pid_file="$LOG_DIR/worker_${host}.pid"
|
||||
run_on_host "$host" "
|
||||
set -euo pipefail
|
||||
cd '$ROOT_DIR'
|
||||
mkdir -p '$LOG_DIR' '$LOCAL_DIRECTORY'
|
||||
if [[ -s '$pid_file' ]]; then
|
||||
pid=\$(cat '$pid_file')
|
||||
if kill -0 \"\$pid\" 2>/dev/null; then
|
||||
echo \"worker already running on $host pid=\$pid\"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
TCM_ENABLE=1 \
|
||||
DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL='$DASK_WORKER_TTL' \
|
||||
DASK_DISTRIBUTED__ADMIN__TICK__LIMIT='$DASK_TICK_LIMIT' \
|
||||
DASK_DISTRIBUTED__DEPLOY__LOST_WORKER_TIMEOUT='$DASK_LOST_WORKER_TIMEOUT' \
|
||||
setsid '$PYTHON_BIN' -m distributed.cli.dask_worker \
|
||||
'$SCHEDULER_ADDR' \
|
||||
--host '$host' \
|
||||
--nworkers '$NWORKERS' \
|
||||
--nthreads '$NTHREADS' \
|
||||
--memory-limit '$MEMORY_LIMIT' \
|
||||
--local-directory '$LOCAL_DIRECTORY' \
|
||||
> '$log' 2>&1 < /dev/null &
|
||||
pid=\$!
|
||||
echo \"\$pid\" > '$pid_file'
|
||||
echo \"worker host=$host pid=\$pid scheduler=$SCHEDULER_ADDR log=$log\"
|
||||
"
|
||||
}
|
||||
|
||||
stop_host() {
|
||||
local host="$1"
|
||||
local scheduler_pid_file="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.pid"
|
||||
local worker_pid_file="$LOG_DIR/worker_${host}.pid"
|
||||
run_on_host "$host" "
|
||||
set +e
|
||||
for pid_file in '$worker_pid_file' '$scheduler_pid_file'; do
|
||||
[[ -f \"\$pid_file\" ]] || continue
|
||||
if [[ \"\$pid_file\" == '$scheduler_pid_file' && '$host' != '$SCHEDULER_HOST' ]]; then
|
||||
continue
|
||||
fi
|
||||
pid=\$(cat \"\$pid_file\")
|
||||
kill \"\$pid\" 2>/dev/null || true
|
||||
rm -f \"\$pid_file\"
|
||||
done
|
||||
pkill -f '[d]istributed.cli.dask_worker.*$SCHEDULER_ADDR'
|
||||
pkill -f '[d]istributed.cli.dask_scheduler.*--port $SCHEDULER_PORT'
|
||||
true
|
||||
"
|
||||
}
|
||||
|
||||
status_host() {
|
||||
local host="$1"
|
||||
local scheduler_pid_file="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.pid"
|
||||
local worker_pid_file="$LOG_DIR/worker_${host}.pid"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "host=$host"
|
||||
run_on_host "$host" "
|
||||
set +e
|
||||
for pid_file in '$worker_pid_file' '$scheduler_pid_file'; do
|
||||
[[ -f \"\$pid_file\" ]] || continue
|
||||
if [[ \"\$pid_file\" == '$scheduler_pid_file' && '$host' != '$SCHEDULER_HOST' ]]; then
|
||||
continue
|
||||
fi
|
||||
pid=\$(cat \"\$pid_file\")
|
||||
if kill -0 \"\$pid\" 2>/dev/null; then
|
||||
ps -p \"\$pid\" -o pid,ppid,stat,etime,cmd --no-headers
|
||||
else
|
||||
echo \"stale pid_file=\$pid_file pid=\$pid\"
|
||||
fi
|
||||
done
|
||||
pgrep -af '[d]istributed.cli.dask' || true
|
||||
"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
start)
|
||||
start_scheduler
|
||||
sleep 2
|
||||
for host in $WORKER_HOSTS; do
|
||||
start_worker "$host"
|
||||
done
|
||||
echo
|
||||
echo "Dask scheduler: $SCHEDULER_ADDR"
|
||||
echo "Dashboard: http://$SCHEDULER_HOST$DASHBOARD_ADDRESS"
|
||||
;;
|
||||
stop)
|
||||
for host in $WORKER_HOSTS; do
|
||||
stop_host "$host"
|
||||
done
|
||||
stop_host "$SCHEDULER_HOST"
|
||||
;;
|
||||
status)
|
||||
status_host "$SCHEDULER_HOST"
|
||||
for host in $WORKER_HOSTS; do
|
||||
[[ "$host" == "$SCHEDULER_HOST" ]] && continue
|
||||
status_host "$host"
|
||||
done
|
||||
;;
|
||||
restart)
|
||||
"$0" stop
|
||||
sleep 2
|
||||
"$0" start
|
||||
;;
|
||||
help|*)
|
||||
cat <<EOF
|
||||
Usage: tools/manage_tn_dask_cluster.sh [start|stop|restart|status]
|
||||
|
||||
Defaults:
|
||||
SCHEDULER_HOST=$SCHEDULER_HOST
|
||||
SCHEDULER_PORT=$SCHEDULER_PORT
|
||||
WORKER_HOSTS="$WORKER_HOSTS"
|
||||
NWORKERS=$NWORKERS
|
||||
NTHREADS=$NTHREADS
|
||||
ROOT_DIR=$ROOT_DIR
|
||||
PYTHON_BIN=$PYTHON_BIN
|
||||
DASK_WORKER_TTL="$DASK_WORKER_TTL"
|
||||
DASK_TICK_LIMIT=$DASK_TICK_LIMIT
|
||||
DASK_LOST_WORKER_TIMEOUT=$DASK_LOST_WORKER_TIMEOUT
|
||||
|
||||
Search command after start:
|
||||
TCM_ENABLE=1 python -u tools/tn_contest_runner.py search \\
|
||||
--case main1 \\
|
||||
--dask-address $SCHEDULER_ADDR \\
|
||||
--torch-threads 48 \\
|
||||
--dtype complex64 \\
|
||||
--tn-search-repeats 2048 \\
|
||||
--tn-search-time 300
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
313
tools/mps_contest_runner.py
Normal file
313
tools/mps_contest_runner.py
Normal file
@@ -0,0 +1,313 @@
|
||||
#!/usr/bin/env python
|
||||
"""Contest-style multi-node Vidal/MPS expectation runner."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend # noqa: E402
|
||||
from qibotn.expectation_runner import exact_for_observable # noqa: E402
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CaseSpec:
|
||||
circuit_kind: str
|
||||
observables: tuple[str, ...]
|
||||
nqubits: int
|
||||
nlayers: int
|
||||
bond: int | None
|
||||
seed: int
|
||||
|
||||
|
||||
CASES = {
|
||||
"main1": CaseSpec(
|
||||
circuit_kind="reversed_cnot",
|
||||
observables=("ring_xz",),
|
||||
nqubits=128,
|
||||
nlayers=24,
|
||||
bond=512,
|
||||
seed=31001,
|
||||
),
|
||||
"main2": CaseSpec(
|
||||
circuit_kind="rxx_rzz",
|
||||
observables=("open_zz", "range2_xx", "mixed_local"),
|
||||
nqubits=128,
|
||||
nlayers=32,
|
||||
bond=1024,
|
||||
seed=31002,
|
||||
),
|
||||
"strong": CaseSpec(
|
||||
circuit_kind="scramble",
|
||||
observables=("ring_xz", "long_z_string", "dense3_spread"),
|
||||
nqubits=256,
|
||||
nlayers=48,
|
||||
bond=2048,
|
||||
seed=41001,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def add_single_qubit_layer(circuit, nqubits, rng, include_rx=False):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if include_rx:
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
if kind == "reversed_cnot":
|
||||
add_single_qubit_layer(circuit, nqubits, rng)
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 == 0 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
|
||||
elif kind == "rxx_rzz":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
|
||||
elif kind == "scramble":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def dense_observable(nqubits, qubits, seed, dim):
|
||||
del nqubits
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def observable(kind, nqubits, seed):
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
if kind == "boundary_ZZ_q1":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))
|
||||
if kind == "boundary_ZZ_q2":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))
|
||||
if kind == "boundary_ZZ_q3":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))
|
||||
if kind == "long_Z_5_sites":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last))
|
||||
if kind == "mixed_XZYZX":
|
||||
return hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last))
|
||||
if kind == "ring_xz":
|
||||
form = 0
|
||||
for qubit in range(nqubits):
|
||||
form += 0.5 * X(qubit) * Z((qubit + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "open_zz":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 1):
|
||||
form += (1.0 / max(1, nqubits - 1)) * Z(qubit) * Z(qubit + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "range2_xx":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 2):
|
||||
form += (1.0 / max(1, nqubits - 2)) * X(qubit) * X(qubit + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "mixed_local":
|
||||
form = 0.25 * X(0) - 0.5 * Z(last) + 0.125 * X(q1) * Z(q2) * Y(q3)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "complex_iZ0":
|
||||
return hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))
|
||||
if kind == "dense2_mid":
|
||||
return dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)
|
||||
if kind == "dense3_spread":
|
||||
return dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def selected_observables(args, case):
|
||||
if args.observables:
|
||||
return tuple(args.observables)
|
||||
if args.obs_filter:
|
||||
return tuple(x.strip() for x in args.obs_filter.split(",") if x.strip())
|
||||
return case.observables
|
||||
|
||||
|
||||
def apply_case_defaults(args):
|
||||
case = CASES[args.case]
|
||||
if args.nqubits is None:
|
||||
args.nqubits = case.nqubits
|
||||
if args.nlayers is None:
|
||||
args.nlayers = case.nlayers
|
||||
if args.bond == "case-default":
|
||||
args.bond = case.bond
|
||||
if args.seed is None:
|
||||
args.seed = case.seed
|
||||
args.observables = selected_observables(args, case)
|
||||
|
||||
|
||||
def run_case(args):
|
||||
set_torch_threads(args.torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
case = CASES[args.case]
|
||||
circuit = build_circuit(case.circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
"backend=vidal_mps "
|
||||
f"case={args.case} circuit={case.circuit_kind} ranks={size} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} gates={len(circuit.queue)} "
|
||||
f"bond={format_optional(args.bond)} cut_ratio={format_optional(args.cut_ratio)} "
|
||||
f"torch_threads={args.torch_threads} seed={args.seed} "
|
||||
f"observables={','.join(args.observables)}",
|
||||
flush=True,
|
||||
)
|
||||
print("observable exact value abs_error rel_error seconds trunc_sum trunc_max status", flush=True)
|
||||
|
||||
for obs_name in args.observables:
|
||||
obs = observable(obs_name, args.nqubits, args.seed)
|
||||
exact = None
|
||||
if args.exact and rank == 0:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, obs, args.nqubits)
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
obs,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
status = "ok"
|
||||
except Exception as exc:
|
||||
value = np.nan
|
||||
status = type(exc).__name__ + ":" + str(exc).split("\n", 1)[0]
|
||||
seconds = time.perf_counter() - start
|
||||
|
||||
if rank == 0:
|
||||
abs_error = float("nan") if exact is None else abs(value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"{obs_name} {exact_text} {value!r} "
|
||||
f"{abs_error:.6e} {rel_error:.6e} {seconds:.3f} "
|
||||
f"{backend.last_truncation_error:.6e} "
|
||||
f"{backend.last_max_truncation_error:.6e} {status}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("mode", choices=("run", "validate", "list"))
|
||||
parser.add_argument("--case", choices=sorted(CASES), default="main1")
|
||||
parser.add_argument("--observables", nargs="+")
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
parser.add_argument("--nqubits", type=int)
|
||||
parser.add_argument("--nlayers", type=int)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", default="case-default")
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--seed", type=int)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mode == "list":
|
||||
for name, case in CASES.items():
|
||||
print(
|
||||
f"{name}: circuit={case.circuit_kind} "
|
||||
f"observables={','.join(case.observables)} "
|
||||
f"nqubits={case.nqubits} nlayers={case.nlayers} "
|
||||
f"bond={case.bond} seed={case.seed}"
|
||||
)
|
||||
return
|
||||
|
||||
apply_case_defaults(args)
|
||||
if isinstance(args.bond, str):
|
||||
args.bond = optional_int(args.bond)
|
||||
|
||||
if args.mode == "validate":
|
||||
args.exact = True
|
||||
args.nqubits = min(args.nqubits, args.exact_max_qubits)
|
||||
|
||||
run_case(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
72
tools/profile_vidal_chrome.py
Normal file
72
tools/profile_vidal_chrome.py
Normal file
@@ -0,0 +1,72 @@
|
||||
"""Chrome trace profiler for the VidalBackend fast path."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from torch.profiler import ProfilerActivity, profile
|
||||
|
||||
from qibotn.benchmark_cases import build_circuit, terms_to_dict, observable_terms
|
||||
from qibotn.expectation_runner import ExpectationConfig, run_cpu_expectation
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=34)
|
||||
parser.add_argument("--nlayers", type=int, default=20)
|
||||
parser.add_argument("--bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument("--cut-ratio", type=float, default=1e-12)
|
||||
parser.add_argument("--profile-memory", action="store_true")
|
||||
parser.add_argument("--rows", type=int, default=60)
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
|
||||
prefix = f"profiles/vidal_n{args.nqubits}_l{args.nlayers}_b{args.bond}_t{args.torch_threads}"
|
||||
trace_path = Path(f"{prefix}.json")
|
||||
table_path = Path(f"{prefix}.txt")
|
||||
trace_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
circuit = build_circuit("brickwall_cnot", args.nqubits, args.nlayers, args.seed)
|
||||
observable = terms_to_dict(observable_terms("ring_xz", args.nqubits))
|
||||
config = ExpectationConfig(
|
||||
ansatz="mps",
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
torch_threads=args.torch_threads,
|
||||
)
|
||||
|
||||
print(
|
||||
f"profile vidal nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={args.bond} threads={args.torch_threads}"
|
||||
)
|
||||
|
||||
with profile(
|
||||
activities=[ProfilerActivity.CPU],
|
||||
record_shapes=args.profile_memory,
|
||||
profile_memory=args.profile_memory,
|
||||
with_stack=args.profile_memory,
|
||||
) as prof:
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
|
||||
table = (
|
||||
f"expval={result.value:.16e}\n\n"
|
||||
f"# sorted by self_cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='self_cpu_time_total', row_limit=args.rows)}\n\n"
|
||||
f"# sorted by cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='cpu_time_total', row_limit=args.rows)}\n"
|
||||
)
|
||||
|
||||
print(table, end="")
|
||||
table_path.write_text(table, encoding="utf-8")
|
||||
prof.export_chrome_trace(str(trace_path))
|
||||
print(f"trace={trace_path}\ntable={table_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
109
tools/qibojit_reference_expectation.py
Normal file
109
tools/qibojit_reference_expectation.py
Normal file
@@ -0,0 +1,109 @@
|
||||
"""Compute and cache a qibojit state-vector reference for the ring-XZ observable."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import qibo
|
||||
from qibo import Circuit, gates
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for _ in range(nlayers):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz_expectation(state, nqubits, chunk_size):
|
||||
value = 0.0
|
||||
for qubit in range(nqubits):
|
||||
next_qubit = (qubit + 1) % nqubits
|
||||
x_flip = 1 << (nqubits - 1 - qubit)
|
||||
z_shift = nqubits - 1 - next_qubit
|
||||
term = 0.0
|
||||
for start in range(0, state.size, chunk_size):
|
||||
stop = min(start + chunk_size, state.size)
|
||||
indices = np.arange(start, stop, dtype=np.int64)
|
||||
z_bit = (indices >> z_shift) & 1
|
||||
z_phase = 1 - 2 * z_bit
|
||||
term += np.vdot(state[indices ^ x_flip], z_phase * state[start:stop]).real
|
||||
value += 0.5 * term
|
||||
return float(value)
|
||||
|
||||
|
||||
def default_output_path(nqubits, nlayers, seed):
|
||||
return Path("references") / (
|
||||
f"qibojit_ring_xz_n{nqubits}_l{nlayers}_seed{seed}.json"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=32)
|
||||
parser.add_argument("--nlayers", type=int, default=3)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--output")
|
||||
parser.add_argument("--force", action="store_true")
|
||||
parser.add_argument("--allow-large", action="store_true")
|
||||
parser.add_argument("--max-state-gb", type=float, default=32.0)
|
||||
parser.add_argument("--chunk-size", type=int, default=1 << 20)
|
||||
args = parser.parse_args()
|
||||
|
||||
output = Path(args.output) if args.output else default_output_path(
|
||||
args.nqubits, args.nlayers, args.seed
|
||||
)
|
||||
if output.exists() and not args.force:
|
||||
with open(output, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
print(f"loaded {output}")
|
||||
print(f"expectation={float(data['expectation']):.16e}")
|
||||
return
|
||||
|
||||
state_gb = (2**args.nqubits) * np.dtype(np.complex128).itemsize / (1024**3)
|
||||
if state_gb > args.max_state_gb and not args.allow_large:
|
||||
raise MemoryError(
|
||||
f"Estimated state vector alone is {state_gb:.1f} GiB. "
|
||||
"Pass --allow-large after confirming the node has enough memory."
|
||||
)
|
||||
|
||||
qibo.set_backend("qibojit")
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed)
|
||||
|
||||
start = time.perf_counter()
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
expectation = ring_xz_expectation(state, args.nqubits, args.chunk_size)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
data = {
|
||||
"backend": "qibojit",
|
||||
"observable": "0.5 * sum_i X_i Z_((i+1) mod n)",
|
||||
"nqubits": args.nqubits,
|
||||
"nlayers": args.nlayers,
|
||||
"seed": args.seed,
|
||||
"expectation": expectation,
|
||||
"seconds": elapsed,
|
||||
"state_vector_gib_estimate": state_gb,
|
||||
}
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, sort_keys=True)
|
||||
f.write("\n")
|
||||
|
||||
print(f"saved {output}")
|
||||
print(f"expectation={expectation:.16e}")
|
||||
print(f"seconds={elapsed:.3f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
127
tools/run_cpu_large_cases.sh
Executable file
127
tools/run_cpu_large_cases.sh
Executable file
@@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Large CPU expectation benchmarks for two-server runs.
|
||||
#
|
||||
# Defaults assume two Intel Xeon Platinum 8558P servers with about 500 GiB RAM
|
||||
# each. Override HOSTFILE, PYTHON_BIN, MPIEXEC, or the per-case knobs below as
|
||||
# needed.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-hostfile}"
|
||||
|
||||
MPS_RANKS="${MPS_RANKS:-8}"
|
||||
MPS_THREADS="${MPS_THREADS:-12}"
|
||||
TN_RANKS="${TN_RANKS:-12}"
|
||||
TN_THREADS="${TN_THREADS:-8}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
run_mpi() {
|
||||
local ranks="$1"
|
||||
shift
|
||||
"$MPIEXEC" -hostfile "$HOSTFILE" -n "$ranks" "$PYTHON_BIN" "$@"
|
||||
}
|
||||
|
||||
run_case() {
|
||||
local title="$1"
|
||||
shift
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$title"
|
||||
echo "================================================================================"
|
||||
echo "HOSTFILE=$HOSTFILE PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
echo "$*"
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
smoke)
|
||||
run_case "MPS MPI smoke: n=40 layers=30 bond=2048" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_SMOKE_NQ:-40}" \
|
||||
--nlayers "${MPS_SMOKE_LAYERS:-30}" \
|
||||
--bond "${MPS_SMOKE_BOND:-2048}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz \
|
||||
--observables ring_xz open_zz range2_xx
|
||||
|
||||
run_case "TN MPI smoke: n=32 layers=16 target_slices=12" \
|
||||
run_mpi "$TN_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "${TN_SMOKE_NQ:-32}" \
|
||||
--nlayers "${TN_SMOKE_LAYERS:-16}" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices "${TN_SMOKE_SLICES:-12}"
|
||||
;;
|
||||
|
||||
mps-long)
|
||||
run_case "MPS MPI long: n=64 layers=48 bond=4096" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_LONG_NQ:-64}" \
|
||||
--nlayers "${MPS_LONG_LAYERS:-48}" \
|
||||
--bond "${MPS_LONG_BOND:-4096}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
mps-pressure)
|
||||
run_case "MPS MPI pressure: n=80 layers=64 bond=4096" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_PRESSURE_NQ:-80}" \
|
||||
--nlayers "${MPS_PRESSURE_LAYERS:-64}" \
|
||||
--bond "${MPS_PRESSURE_BOND:-4096}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz swap_scramble \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
tn-long)
|
||||
run_case "TN MPI long: n=36 layers=20 target_slices=24" \
|
||||
run_mpi "$TN_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "${TN_LONG_NQ:-36}" \
|
||||
--nlayers "${TN_LONG_LAYERS:-20}" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices "${TN_LONG_SLICES:-24}"
|
||||
;;
|
||||
|
||||
all)
|
||||
"$0" smoke
|
||||
"$0" mps-long
|
||||
"$0" tn-long
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_cpu_large_cases.sh [smoke|mps-long|mps-pressure|tn-long|all]
|
||||
|
||||
Common overrides:
|
||||
HOSTFILE=hostfile
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPS_RANKS=8 MPS_THREADS=12
|
||||
TN_RANKS=12 TN_THREADS=8
|
||||
|
||||
Scale overrides:
|
||||
MPS_LONG_NQ=64 MPS_LONG_LAYERS=48 MPS_LONG_BOND=4096
|
||||
MPS_PRESSURE_NQ=80 MPS_PRESSURE_LAYERS=64 MPS_PRESSURE_BOND=4096
|
||||
TN_LONG_NQ=36 TN_LONG_LAYERS=20 TN_LONG_SLICES=24
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
148
tools/run_cpu_single_cases.sh
Executable file
148
tools/run_cpu_single_cases.sh
Executable file
@@ -0,0 +1,148 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Single-node CPU scale probes for expectation benchmarks.
|
||||
#
|
||||
# Intended for one 96-core / ~500 GiB RAM node. The default "probe" mode runs
|
||||
# moderate MPS and TN cases first. Larger modes are available after checking
|
||||
# runtime and memory from the probe output.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
PYTHON_FLAGS="${PYTHON_FLAGS:--u}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
TIME_BIN="${TIME_BIN:-/usr/bin/time}"
|
||||
|
||||
MPS_RANKS="${MPS_RANKS:-8}"
|
||||
MPS_THREADS="${MPS_THREADS:-12}"
|
||||
TN_RANKS="${TN_RANKS:-8}"
|
||||
TN_THREADS="${TN_THREADS:-12}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
estimate_mps_memory() {
|
||||
local nqubits="$1"
|
||||
local bond="$2"
|
||||
"$PYTHON_BIN" - "$nqubits" "$bond" "$MPS_RANKS" <<'PY'
|
||||
import sys
|
||||
n = int(sys.argv[1])
|
||||
chi = int(sys.argv[2])
|
||||
ranks = int(sys.argv[3])
|
||||
resident = n * 2 * chi * chi * 16
|
||||
per_rank = resident / ranks
|
||||
print(
|
||||
"MPS rough resident memory: "
|
||||
f"total={resident / 1024**3:.1f} GiB "
|
||||
f"per_rank={per_rank / 1024**3:.1f} GiB "
|
||||
"(temporary eig/SVD workspaces are additional)"
|
||||
)
|
||||
PY
|
||||
}
|
||||
|
||||
run_timed() {
|
||||
echo
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "$*"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
"$TIME_BIN" -v "$@"
|
||||
}
|
||||
|
||||
run_mps_case() {
|
||||
local label="$1"
|
||||
local nqubits="$2"
|
||||
local nlayers="$3"
|
||||
local bond="$4"
|
||||
shift 4
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$label"
|
||||
echo "================================================================================"
|
||||
echo "PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "MPS_RANKS=$MPS_RANKS MPS_THREADS=$MPS_THREADS"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
estimate_mps_memory "$nqubits" "$bond"
|
||||
run_timed "$MPIEXEC" -n "$MPS_RANKS" "$PYTHON_BIN" $PYTHON_FLAGS benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "$nqubits" \
|
||||
--nlayers "$nlayers" \
|
||||
--bond "$bond" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
run_tn_case() {
|
||||
local label="$1"
|
||||
local nqubits="$2"
|
||||
local nlayers="$3"
|
||||
shift 3
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$label"
|
||||
echo "================================================================================"
|
||||
echo "PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "TN_RANKS=$TN_RANKS TN_THREADS=$TN_THREADS"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
echo "TN memory is contraction-tree dependent; increase --tn-target-slices if RSS is high."
|
||||
run_timed "$MPIEXEC" -n "$TN_RANKS" "$PYTHON_BIN" $PYTHON_FLAGS benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "$nqubits" \
|
||||
--nlayers "$nlayers" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
probe)
|
||||
run_mps_case "MPS probe: n=40 layers=30 bond=2048" 40 30 2048 \
|
||||
--circuits brickwall_cnot \
|
||||
--observables ring_xz
|
||||
|
||||
run_tn_case "TN probe: n=28 layers=12 target_slices=8" 28 12 \
|
||||
--circuits brickwall_cnot \
|
||||
--observables ring_xz \
|
||||
--tn-target-slices 8
|
||||
;;
|
||||
|
||||
mps-medium)
|
||||
run_mps_case "MPS medium: n=56 layers=40 bond=3072" 56 40 3072 \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
mps-long)
|
||||
run_mps_case "MPS long: n=64 layers=48 bond=4096" 64 48 4096 \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
tn-medium)
|
||||
run_tn_case "TN medium: n=32 layers=16 target_slices=16" 32 16 \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices 16
|
||||
;;
|
||||
|
||||
tn-long)
|
||||
run_tn_case "TN long: n=36 layers=20 target_slices=32" 36 20 \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices 32
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_cpu_single_cases.sh [probe|mps-medium|mps-long|tn-medium|tn-long]
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPS_RANKS=8 MPS_THREADS=12
|
||||
TN_RANKS=8 TN_THREADS=12
|
||||
OMP_NUM_THREADS=1 MKL_NUM_THREADS=1
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
243
tools/run_tn_custom.py
Normal file
243
tools/run_tn_custom.py
Normal file
@@ -0,0 +1,243 @@
|
||||
#!/usr/bin/env python
|
||||
"""Run TN expectation for a user-provided circuit and observable.
|
||||
|
||||
The case module should define:
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed): ...
|
||||
def build_observable(nqubits, seed): ...
|
||||
|
||||
``build_observable`` may return a Qibo SymbolicHamiltonian/form or the qibotn
|
||||
dict form:
|
||||
|
||||
{"terms": [
|
||||
{"coefficient": 1.0, "operators": [("X", 0), ("Z", 1)]},
|
||||
]}
|
||||
|
||||
For a single repeated Pauli string, pass ``--pauli-pattern`` instead of
|
||||
defining ``build_observable``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import importlib.util
|
||||
import inspect
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from qibotn.expectation_runner import ( # noqa: E402
|
||||
ExpectationConfig,
|
||||
exact_for_observable,
|
||||
run_cpu_expectation,
|
||||
)
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def load_module(path):
|
||||
path = Path(path).resolve()
|
||||
spec = importlib.util.spec_from_file_location(path.stem, path)
|
||||
if spec is None or spec.loader is None:
|
||||
raise RuntimeError(f"Cannot import case module from {path}.")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def call_builder(fn, **kwargs):
|
||||
sig = inspect.signature(fn)
|
||||
if any(p.kind == p.VAR_KEYWORD for p in sig.parameters.values()):
|
||||
return fn(**kwargs)
|
||||
accepted = {
|
||||
name: value
|
||||
for name, value in kwargs.items()
|
||||
if name in sig.parameters
|
||||
}
|
||||
return fn(**accepted)
|
||||
|
||||
|
||||
def load_observable(args, module):
|
||||
if args.pauli_pattern:
|
||||
return {"pauli_string_pattern": args.pauli_pattern}
|
||||
if args.observable_json:
|
||||
with Path(args.observable_json).open() as f:
|
||||
return json.load(f)
|
||||
if hasattr(module, "build_observable"):
|
||||
return call_builder(
|
||||
module.build_observable,
|
||||
nqubits=args.nqubits,
|
||||
nlayers=args.nlayers,
|
||||
seed=args.seed,
|
||||
)
|
||||
if hasattr(module, "OBSERVABLE"):
|
||||
return module.OBSERVABLE
|
||||
raise ValueError(
|
||||
"No observable supplied. Define build_observable/OBSERVABLE in the case "
|
||||
"module, or pass --pauli-pattern / --observable-json."
|
||||
)
|
||||
|
||||
|
||||
def build_parallel_opts(args):
|
||||
slicing_opts = {}
|
||||
if args.tn_target_slices is not None:
|
||||
slicing_opts["target_slices"] = args.tn_target_slices
|
||||
if args.tn_target_size is not None:
|
||||
slicing_opts["target_size"] = args.tn_target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": args.tn_search_workers or args.torch_threads,
|
||||
"max_repeats": args.tn_search_repeats,
|
||||
"max_time": args.tn_search_time,
|
||||
"print_stats": not args.no_tn_stats,
|
||||
}
|
||||
if args.tn_search_backend is not None:
|
||||
opts["search_backend"] = args.tn_search_backend
|
||||
if args.dask_address is not None:
|
||||
opts["dask_address"] = args.dask_address
|
||||
if args.dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
if args.tn_save_tree is not None:
|
||||
opts["save_tree_path"] = args.tn_save_tree
|
||||
if args.tn_load_tree is not None:
|
||||
opts["load_tree_path"] = args.tn_load_tree
|
||||
if args.tn_search_only:
|
||||
opts["search_only"] = True
|
||||
return opts
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run CPU TN expectation for a custom qibo circuit module."
|
||||
)
|
||||
parser.add_argument("case_module", help="Python file defining build_circuit.")
|
||||
parser.add_argument("--nqubits", type=int, required=True)
|
||||
parser.add_argument("--nlayers", type=int, default=0)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=1024)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--quimb-backend", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--dtype", choices=("complex128", "complex64"), default="complex128")
|
||||
parser.add_argument("--pauli-pattern")
|
||||
parser.add_argument("--observable-json")
|
||||
parser.add_argument("--tn-target-slices", type=int)
|
||||
parser.add_argument("--tn-target-size", type=int, default=2**32)
|
||||
parser.add_argument("--tn-search-workers", type=int)
|
||||
parser.add_argument("--tn-search-repeats", type=int, default=128)
|
||||
parser.add_argument("--tn-search-time", type=float, default=60.0)
|
||||
parser.add_argument("--tn-search-backend", choices=("processpool", "dask"))
|
||||
parser.add_argument("--dask-address")
|
||||
parser.add_argument("--dask-close-workers", action="store_true")
|
||||
parser.add_argument("--tn-save-tree")
|
||||
parser.add_argument("--tn-load-tree")
|
||||
parser.add_argument("--tn-search-only", action="store_true")
|
||||
parser.add_argument("--no-tn-stats", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
rank = 0
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
module = load_module(args.case_module)
|
||||
if not hasattr(module, "build_circuit"):
|
||||
raise ValueError("case_module must define build_circuit.")
|
||||
|
||||
circuit = call_builder(
|
||||
module.build_circuit,
|
||||
nqubits=args.nqubits,
|
||||
nlayers=args.nlayers,
|
||||
seed=args.seed,
|
||||
)
|
||||
observable = load_observable(args, module)
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz="tn",
|
||||
mpi=args.mpi,
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=args.quimb_backend,
|
||||
dtype=args.dtype,
|
||||
torch_threads=args.torch_threads,
|
||||
parallel_opts=build_parallel_opts(args),
|
||||
)
|
||||
|
||||
if rank == 0:
|
||||
mode = "MPI" if args.mpi else "serial"
|
||||
print(
|
||||
f"backend=cpu ansatz=TN mode={mode} case={Path(args.case_module).name} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} seed={args.seed} "
|
||||
f"quimb_backend={args.quimb_backend} dtype={args.dtype} "
|
||||
f"torch_threads={args.torch_threads}",
|
||||
flush=True,
|
||||
)
|
||||
print("observable exact value abs_error rel_error seconds", flush=True)
|
||||
|
||||
exact = None
|
||||
if args.exact and rank == 0:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, observable, args.nqubits)
|
||||
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if args.mpi and result.rank != 0:
|
||||
return
|
||||
|
||||
abs_error = float("nan") if exact is None else abs(result.value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"custom {exact_text} {result.value:.16e} "
|
||||
f"{abs_error:.6e} {rel_error:.6e} {result.seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for stat in result.parallel_stats or ():
|
||||
cost = stat["path_cost"]
|
||||
search_stats = stat.get("search_stats", {})
|
||||
print(
|
||||
"tn_term_summary "
|
||||
f"term={stat.get('term_index', 0)} "
|
||||
f"search_seconds={stat.get('search_seconds', float('nan')):.3f} "
|
||||
f"contract_seconds={stat.get('contract_seconds', float('nan')):.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', 'na')} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={cost.get('slices')} "
|
||||
f"log10_flops={cost.get('log10_flops', float('nan')):.3f} "
|
||||
f"log10_write={cost.get('log10_write', float('nan')):.3f} "
|
||||
f"log2_size={cost.get('log2_size', float('nan')):.3f} "
|
||||
f"peak_memory_gib={cost.get('peak_memory_gib', float('nan')):.3g} "
|
||||
f"rank_slices={stat.get('rank_slices')}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
93
tools/run_tn_dask_mpi_all.sh
Executable file
93
tools/run_tn_dask_mpi_all.sh
Executable file
@@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
CASE="${CASE:-main1}"
|
||||
OBSERVABLES="${OBSERVABLES:-long_z_string}"
|
||||
NQUBITS="${NQUBITS:-34}"
|
||||
NLAYERS="${NLAYERS:-20}"
|
||||
TORCH_THREADS="${TORCH_THREADS:-48}"
|
||||
SEARCH_REPEATS="${SEARCH_REPEATS:-2048}"
|
||||
SEARCH_TIME="${SEARCH_TIME:-300}"
|
||||
TN_TARGET_SIZE="${TN_TARGET_SIZE:-8589934592}"
|
||||
TN_TARGET_SLICES="${TN_TARGET_SLICES:-}"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
DTYPE="${DTYPE:-complex64}"
|
||||
TREE_DIR="${TREE_DIR:-trees/contest_tn}"
|
||||
DASK_ADDRESS="${DASK_ADDRESS:-tcp://10.20.1.103:8786}"
|
||||
MPIEXEC_FULL="${MPIEXEC_FULL:-mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2}"
|
||||
SYNC_TREES="${SYNC_TREES:-1}"
|
||||
SYNC_HOSTS="${SYNC_HOSTS:-${WORKER_HOSTS:-}}"
|
||||
SSH_BIN="${SSH_BIN:-ssh}"
|
||||
|
||||
export TCM_ENABLE="${TCM_ENABLE:-1}"
|
||||
|
||||
tn_slice_args=(--tn-target-size "$TN_TARGET_SIZE")
|
||||
if [[ -n "$TN_TARGET_SLICES" ]]; then
|
||||
tn_slice_args+=(--tn-target-slices "$TN_TARGET_SLICES")
|
||||
fi
|
||||
|
||||
is_local_host() {
|
||||
local host="$1"
|
||||
[[ "$host" == "localhost" || "$host" == "127.0.0.1" ]] && return 0
|
||||
[[ "$host" == "$(hostname)" ]] && return 0
|
||||
[[ "$host" == "$(hostname -f 2>/dev/null || true)" ]] && return 0
|
||||
hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx "$host"
|
||||
}
|
||||
|
||||
sync_trees_to_hosts() {
|
||||
[[ "$SYNC_TREES" == "1" ]] || return 0
|
||||
[[ -n "$SYNC_HOSTS" ]] || return 0
|
||||
|
||||
local src_dir="$TREE_DIR"
|
||||
local dst_dir="$TREE_DIR"
|
||||
if [[ "$TREE_DIR" != /* ]]; then
|
||||
src_dir="$ROOT_DIR/$TREE_DIR"
|
||||
dst_dir="$ROOT_DIR/$TREE_DIR"
|
||||
fi
|
||||
|
||||
for host in $SYNC_HOSTS; do
|
||||
is_local_host "$host" && continue
|
||||
echo "Sync tree dir to $host:$dst_dir"
|
||||
"$SSH_BIN" "$host" "mkdir -p $(printf '%q' "$dst_dir")"
|
||||
if command -v rsync >/dev/null 2>&1; then
|
||||
rsync -a "$src_dir/" "$host:$dst_dir/"
|
||||
else
|
||||
scp -q "$src_dir"/*.pkl "$host:$dst_dir/"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
tools/manage_tn_dask_cluster.sh start
|
||||
|
||||
echo "Search with dask: $DASK_ADDRESS"
|
||||
"$PYTHON_BIN" -u tools/tn_contest_runner.py search \
|
||||
--case "$CASE" \
|
||||
--nqubits "$NQUBITS" \
|
||||
--nlayers "$NLAYERS" \
|
||||
--observables $OBSERVABLES \
|
||||
--tree-dir "$TREE_DIR" \
|
||||
--dask-address "$DASK_ADDRESS" \
|
||||
--torch-threads "$TORCH_THREADS" \
|
||||
--dtype "$DTYPE" \
|
||||
--tn-search-repeats "$SEARCH_REPEATS" \
|
||||
--tn-search-time "$SEARCH_TIME" \
|
||||
"${tn_slice_args[@]}"
|
||||
|
||||
sync_trees_to_hosts
|
||||
|
||||
echo "Contract with MPI: $MPIEXEC_FULL"
|
||||
read -r -a mpi_prefix <<< "$MPIEXEC_FULL"
|
||||
"${mpi_prefix[@]}" "$PYTHON_BIN" -u tools/tn_contest_runner.py contract \
|
||||
--mpi \
|
||||
--case "$CASE" \
|
||||
--nqubits "$NQUBITS" \
|
||||
--nlayers "$NLAYERS" \
|
||||
--observables $OBSERVABLES \
|
||||
--tree-dir "$TREE_DIR" \
|
||||
--torch-threads "$TORCH_THREADS" \
|
||||
--dtype "$DTYPE" \
|
||||
"${tn_slice_args[@]}"
|
||||
340
tools/run_vidal_mpi_contest_cases.sh
Executable file
340
tools/run_vidal_mpi_contest_cases.sh
Executable file
@@ -0,0 +1,340 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Contest-style Vidal/MPI MPS cases.
|
||||
#
|
||||
# Usage:
|
||||
# tools/run_vidal_mpi_contest_cases.sh main1
|
||||
# tools/run_vidal_mpi_contest_cases.sh main2
|
||||
# tools/run_vidal_mpi_contest_cases.sh strong
|
||||
# tools/run_vidal_mpi_contest_cases.sh all
|
||||
#
|
||||
# Common overrides:
|
||||
# PYTHON_BIN=.venv/bin/python
|
||||
# MPIEXEC=mpiexec
|
||||
# MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2"
|
||||
# HOSTFILE=hostfile # optional; used only if the file exists
|
||||
# RANKS=8
|
||||
# TORCH_THREADS=8
|
||||
# CUT_RATIO=1e-12
|
||||
# OBS_FILTER="boundary_ZZ_q2 ring_xz dense3_spread complex_iZ0"
|
||||
#
|
||||
# Per-case overrides:
|
||||
# MAIN1_NQ=128 MAIN1_LAYERS=50 MAIN1_BOND=1024 MAIN1_SEED=31001
|
||||
# MAIN2_NQ=128 MAIN2_LAYERS=64 MAIN2_BOND=2048 MAIN2_SEED=31002
|
||||
# STRONG_NQ=256 STRONG_LAYERS=64 STRONG_BOND=2048 STRONG_SEED=41001
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-}"
|
||||
RANKS="${RANKS:-4}"
|
||||
TORCH_THREADS="${TORCH_THREADS:-1}"
|
||||
CUT_RATIO="${CUT_RATIO:-1e-12}"
|
||||
OBS_FILTER="${OBS_FILTER:-}"
|
||||
|
||||
RUNNER_DIR="$ROOT_DIR/.tmp"
|
||||
mkdir -p "$RUNNER_DIR"
|
||||
RUNNER="$(mktemp "$RUNNER_DIR/qibotn_vidal_contest.XXXXXX.py")"
|
||||
cleanup() {
|
||||
rm -f "$RUNNER"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
cat > "$RUNNER" <<'PY'
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind in ("rxx_rzz", "scramble"):
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind == "reversed_cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 else gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 == 0 else gates.CNOT(q, q + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
elif kind == "scramble":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def open_zz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 1):
|
||||
form += (1.0 / (nqubits - 1)) * Z(q) * Z(q + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def range2_xx(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 2):
|
||||
form += (1.0 / (nqubits - 2)) * X(q) * X(q + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def dense_observable(nqubits, qubits, seed, dim):
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def observables_for_case(nqubits, seed):
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
return [
|
||||
("boundary_ZZ_q1", hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))),
|
||||
("boundary_ZZ_q2", hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))),
|
||||
("boundary_ZZ_q3", hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))),
|
||||
(
|
||||
"long_Z_5_sites",
|
||||
hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last)),
|
||||
),
|
||||
(
|
||||
"mixed_XZYZX",
|
||||
hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last)),
|
||||
),
|
||||
("ring_xz", ring_xz(nqubits)),
|
||||
("open_zz", open_zz(nqubits)),
|
||||
("range2_xx", range2_xx(nqubits)),
|
||||
("complex_iZ0", hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))),
|
||||
("dense2_mid", dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)),
|
||||
("dense3_spread", dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)),
|
||||
]
|
||||
|
||||
|
||||
def run_case(args):
|
||||
set_torch_threads(args.torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
circuit = build_circuit(args.kind, args.nqubits, args.nlayers, args.seed)
|
||||
observables = observables_for_case(args.nqubits, args.seed)
|
||||
if args.obs_filter:
|
||||
wanted = set(args.obs_filter.split(","))
|
||||
observables = [(name, obs) for name, obs in observables if name in wanted]
|
||||
if not observables:
|
||||
raise ValueError(f"OBS_FILTER matched no observables: {args.obs_filter!r}")
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
"case "
|
||||
f"label={args.label} kind={args.kind} ranks={size} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} gates={len(circuit.queue)} "
|
||||
f"bond={args.bond} cut_ratio={args.cut_ratio:g} "
|
||||
f"torch_threads={args.torch_threads} seed={args.seed} "
|
||||
f"obs_filter={args.obs_filter or 'all'}",
|
||||
flush=True,
|
||||
)
|
||||
print(
|
||||
"observable value seconds trunc_sum trunc_max status",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for obs_name, observable in observables:
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
status = "ok"
|
||||
except Exception as exc: # pragma: no cover - printed for manual runs
|
||||
value = np.nan
|
||||
status = type(exc).__name__ + ":" + str(exc).split("\n", 1)[0]
|
||||
seconds = time.perf_counter() - start
|
||||
|
||||
if rank == 0:
|
||||
print(
|
||||
f"{obs_name} {value!r} {seconds:.3f} "
|
||||
f"{backend.last_truncation_error:.6e} "
|
||||
f"{backend.last_max_truncation_error:.6e} {status}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--label", required=True)
|
||||
parser.add_argument("--kind", choices=("reversed_cnot", "rxx_rzz", "scramble"), required=True)
|
||||
parser.add_argument("--nqubits", type=int, required=True)
|
||||
parser.add_argument("--nlayers", type=int, required=True)
|
||||
parser.add_argument("--bond", type=int, required=True)
|
||||
parser.add_argument("--cut-ratio", type=float, required=True)
|
||||
parser.add_argument("--seed", type=int, required=True)
|
||||
parser.add_argument("--torch-threads", type=int, required=True)
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
run_case(parser.parse_args())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
PY
|
||||
|
||||
if [[ -n "${MPIEXEC_FULL:-}" ]]; then
|
||||
read -r -a mpi_prefix <<< "$MPIEXEC_FULL"
|
||||
else
|
||||
mpi_prefix=("$MPIEXEC")
|
||||
if [[ -n "$HOSTFILE" && -f "$HOSTFILE" ]]; then
|
||||
mpi_prefix+=("-hostfile" "$HOSTFILE")
|
||||
fi
|
||||
mpi_prefix+=("-n" "$RANKS")
|
||||
fi
|
||||
|
||||
run_case() {
|
||||
local label="$1"
|
||||
local kind="$2"
|
||||
local nq="$3"
|
||||
local layers="$4"
|
||||
local bond="$5"
|
||||
local seed="$6"
|
||||
|
||||
echo
|
||||
echo "Running $label: kind=$kind nqubits=$nq layers=$layers bond=$bond seed=$seed"
|
||||
echo "MPI: ${mpi_prefix[*]}"
|
||||
"${mpi_prefix[@]}" "$PYTHON_BIN" -u "$ROOT_DIR/tools/vidal_mpi_contest_runner.py" \
|
||||
--label "$label" \
|
||||
--kind "$kind" \
|
||||
--nqubits "$nq" \
|
||||
--nlayers "$layers" \
|
||||
--bond "$bond" \
|
||||
--cut-ratio "$CUT_RATIO" \
|
||||
--seed "$seed" \
|
||||
--torch-threads "$TORCH_THREADS" \
|
||||
--obs-filter "$(tr ' ' ',' <<< "$OBS_FILTER")"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
main1)
|
||||
run_case \
|
||||
"main1-reversed-cnot" \
|
||||
"reversed_cnot" \
|
||||
"${MAIN1_NQ:-128}" \
|
||||
"${MAIN1_LAYERS:-50}" \
|
||||
"${MAIN1_BOND:-1024}" \
|
||||
"${MAIN1_SEED:-31001}"
|
||||
;;
|
||||
main2)
|
||||
run_case \
|
||||
"main2-rxx-rzz" \
|
||||
"rxx_rzz" \
|
||||
"${MAIN2_NQ:-128}" \
|
||||
"${MAIN2_LAYERS:-64}" \
|
||||
"${MAIN2_BOND:-2048}" \
|
||||
"${MAIN2_SEED:-31002}"
|
||||
;;
|
||||
strong)
|
||||
run_case \
|
||||
"strong-scramble" \
|
||||
"scramble" \
|
||||
"${STRONG_NQ:-256}" \
|
||||
"${STRONG_LAYERS:-64}" \
|
||||
"${STRONG_BOND:-2048}" \
|
||||
"${STRONG_SEED:-41001}"
|
||||
;;
|
||||
all)
|
||||
"$0" main1
|
||||
"$0" main2
|
||||
"$0" strong
|
||||
;;
|
||||
smoke)
|
||||
MAIN1_NQ="${MAIN1_NQ:-32}" \
|
||||
MAIN1_LAYERS="${MAIN1_LAYERS:-6}" \
|
||||
MAIN1_BOND="${MAIN1_BOND:-128}" \
|
||||
"$0" main1
|
||||
;;
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_vidal_mpi_contest_cases.sh [main1|main2|strong|all|smoke]
|
||||
|
||||
Cases:
|
||||
main1 128 qubits, 50 layers, reversed-CNOT brickwall, chi=1024
|
||||
main2 128 qubits, 64 layers, RXX/RZZ brickwall, chi=2048
|
||||
strong 256 qubits, 64 layers, RXX/RZZ + periodic SWAP scramble, chi=2048
|
||||
smoke Small syntax/runtime check of main1
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2"
|
||||
HOSTFILE=hostfile
|
||||
RANKS=8
|
||||
TORCH_THREADS=8
|
||||
CUT_RATIO=1e-12
|
||||
OBS_FILTER="boundary_ZZ_q2 ring_xz dense3_spread complex_iZ0"
|
||||
|
||||
Per-case overrides:
|
||||
MAIN1_NQ=128 MAIN1_LAYERS=50 MAIN1_BOND=1024 MAIN1_SEED=31001
|
||||
MAIN2_NQ=128 MAIN2_LAYERS=64 MAIN2_BOND=2048 MAIN2_SEED=31002
|
||||
STRONG_NQ=256 STRONG_LAYERS=64 STRONG_BOND=2048 STRONG_SEED=41001
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
70
tools/run_vidal_segment_mpi_scan.sh
Executable file
70
tools/run_vidal_segment_mpi_scan.sh
Executable file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NQ="${NQ:-34}"
|
||||
LAYERS="${LAYERS:-20}"
|
||||
BOND="${BOND:-512}"
|
||||
SEED="${SEED:-42}"
|
||||
RANKS="${RANKS:-1 2 4}"
|
||||
THREADS="${THREADS:-32 32 16}"
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
CIRCUIT="${CIRCUIT:-brickwall_cnot}"
|
||||
OBSERVABLE="${OBSERVABLE:-ring_xz}"
|
||||
EXACT="${EXACT:-0}"
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
if [[ "${1:-help}" != "run" ]]; then
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_vidal_segment_mpi_scan.sh run
|
||||
|
||||
Overrides:
|
||||
NQ=34 LAYERS=20 BOND=512 SEED=42
|
||||
RANKS="1 2 4" THREADS="32 32 16"
|
||||
CIRCUIT=brickwall_cnot OBSERVABLE=ring_xz
|
||||
EXACT=1
|
||||
PYTHON_BIN=.venv/bin/python MPIEXEC=mpiexec
|
||||
EOF
|
||||
if [[ "${1:-help}" == "help" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
exit 2
|
||||
fi
|
||||
|
||||
read -r -a ranks <<< "$RANKS"
|
||||
read -r -a threads <<< "$THREADS"
|
||||
|
||||
if [[ "${#ranks[@]}" != "${#threads[@]}" ]]; then
|
||||
echo "RANKS and THREADS must have the same number of entries." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
common=(
|
||||
--nqubits "$NQ"
|
||||
--nlayers "$LAYERS"
|
||||
--bond "$BOND"
|
||||
--seed "$SEED"
|
||||
--mps
|
||||
--circuits "$CIRCUIT"
|
||||
--observables "$OBSERVABLE"
|
||||
)
|
||||
|
||||
if [[ "$EXACT" == "1" ]]; then
|
||||
common+=(--exact)
|
||||
fi
|
||||
|
||||
for idx in "${!ranks[@]}"; do
|
||||
nrank="${ranks[$idx]}"
|
||||
nthr="${threads[$idx]}"
|
||||
if [[ "$nrank" == "1" ]]; then
|
||||
echo "== Vidal serial ranks=1 torch_threads=$nthr =="
|
||||
"$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
"${common[@]}" --torch-threads "$nthr"
|
||||
else
|
||||
echo "== Vidal segmented MPI ranks=$nrank torch_threads=$nthr =="
|
||||
"$MPIEXEC" -n "$nrank" "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
"${common[@]}" --torch-threads "$nthr" --mpi
|
||||
fi
|
||||
done
|
||||
59
tools/slice_existing_tree.py
Normal file
59
tools/slice_existing_tree.py
Normal file
@@ -0,0 +1,59 @@
|
||||
"""Slice an existing saved cotengra tree without re-running path search."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
from qibotn.parallel import contraction_tree_costs
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("input", help="Input pickle saved by --tn-save-tree.")
|
||||
parser.add_argument("output", help="Output pickle path.")
|
||||
parser.add_argument("--term", type=int, default=0)
|
||||
parser.add_argument("--target-slices", type=int, default=2)
|
||||
parser.add_argument("--max-repeats", type=int, default=64)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
with input_path.open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
|
||||
trees = payload["trees"] if isinstance(payload, dict) else payload
|
||||
if not isinstance(trees, (list, tuple)):
|
||||
trees = [trees]
|
||||
tree = trees[args.term]
|
||||
|
||||
print("original", contraction_tree_costs(tree), flush=True)
|
||||
sliced = tree.slice(
|
||||
target_slices=args.target_slices,
|
||||
max_repeats=args.max_repeats,
|
||||
seed=args.seed,
|
||||
)
|
||||
print("sliced", contraction_tree_costs(sliced), flush=True)
|
||||
print(f"sliced_inds={sliced.sliced_inds}", flush=True)
|
||||
|
||||
new_trees = list(trees)
|
||||
new_trees[args.term] = sliced
|
||||
|
||||
if isinstance(payload, dict):
|
||||
out_payload = dict(payload)
|
||||
out_payload["trees"] = new_trees
|
||||
out_payload["costs"] = [contraction_tree_costs(t) for t in new_trees]
|
||||
out_payload["nterms"] = len(new_trees)
|
||||
else:
|
||||
out_payload = new_trees
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open("wb") as f:
|
||||
pickle.dump(out_payload, f)
|
||||
print(f"saved {output_path}", flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
440
tools/tn_contest_runner.py
Normal file
440
tools/tn_contest_runner.py
Normal file
@@ -0,0 +1,440 @@
|
||||
#!/usr/bin/env python
|
||||
"""Contest-style CPU TN path search and contraction runner.
|
||||
|
||||
This file is intentionally self-contained: define contest circuits and
|
||||
observables here, run path search once, then load the saved trees for repeated
|
||||
MPI contractions.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from qibotn.expectation_runner import ( # noqa: E402
|
||||
ExpectationConfig,
|
||||
exact_for_observable,
|
||||
run_cpu_expectation,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CaseSpec:
|
||||
circuit_kind: str
|
||||
observables: tuple[str, ...]
|
||||
nqubits: int
|
||||
nlayers: int
|
||||
seed: int
|
||||
target_slices: int | None = None
|
||||
|
||||
|
||||
CASES = {
|
||||
"main1": CaseSpec(
|
||||
circuit_kind="rxx_rzz_chain",
|
||||
observables=("ring_xz",),
|
||||
nqubits=34,
|
||||
nlayers=20,
|
||||
seed=31001,
|
||||
target_slices=None,
|
||||
),
|
||||
"main2": CaseSpec(
|
||||
circuit_kind="scramble_chain",
|
||||
observables=("open_zz", "range2_xx"),
|
||||
nqubits=36,
|
||||
nlayers=18,
|
||||
seed=31002,
|
||||
target_slices=None,
|
||||
),
|
||||
"strong": CaseSpec(
|
||||
circuit_kind="reversed_cnot",
|
||||
observables=("ring_xz", "long_z_string"),
|
||||
nqubits=40,
|
||||
nlayers=24,
|
||||
seed=41001,
|
||||
target_slices=None,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def add_single_qubit_layer(circuit, nqubits, rng, include_rx=False):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if include_rx:
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
"""Define contest circuits here."""
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
if kind == "rxx_rzz_chain":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
|
||||
elif kind == "scramble_chain":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
|
||||
elif kind == "reversed_cnot":
|
||||
add_single_qubit_layer(circuit, nqubits, rng)
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 == 0 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def pauli_sum_observable(kind, nqubits, seed):
|
||||
"""Define contest observables here.
|
||||
|
||||
TN path currently expects Pauli products / SymbolicHamiltonian terms.
|
||||
Keep production contest observables Hermitian unless complex output is
|
||||
explicitly required by the scoring rule.
|
||||
"""
|
||||
del seed
|
||||
if kind == "ring_xz":
|
||||
form = 0
|
||||
for qubit in range(nqubits):
|
||||
form += 0.5 * X(qubit) * Z((qubit + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "open_zz":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 1):
|
||||
form += (1.0 / max(1, nqubits - 1)) * Z(qubit) * Z(qubit + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "range2_xx":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 2):
|
||||
form += (1.0 / max(1, nqubits - 2)) * X(qubit) * X(qubit + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "long_z_string":
|
||||
stride = max(1, nqubits // 16)
|
||||
form = None
|
||||
for qubit in range(0, nqubits, stride):
|
||||
form = Z(qubit) if form is None else form * Z(qubit)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "mixed_local":
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
form = 0.25 * X(0) - 0.5 * Z(nqubits - 1)
|
||||
form += 0.125 * X(q1) * Z(q2) * Y(q3)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def tree_path(tree_dir, case_name, obs_name, nqubits, nlayers, target_slices):
|
||||
slice_label = "auto" if target_slices is None else f"s{target_slices}"
|
||||
return (
|
||||
Path(tree_dir)
|
||||
/ f"{case_name}_{obs_name}_{nqubits}q{nlayers}l_{slice_label}.pkl"
|
||||
)
|
||||
|
||||
|
||||
def build_parallel_opts(args, tree_file=None, search_only=False):
|
||||
slicing_opts = {}
|
||||
if args.tn_target_slices is not None:
|
||||
slicing_opts["target_slices"] = args.tn_target_slices
|
||||
if args.tn_target_size is not None:
|
||||
slicing_opts["target_size"] = args.tn_target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": args.tn_search_workers or args.torch_threads,
|
||||
"max_repeats": args.tn_search_repeats,
|
||||
"max_time": args.tn_search_time,
|
||||
"print_stats": False,
|
||||
}
|
||||
if args.tn_search_backend is not None:
|
||||
opts["search_backend"] = args.tn_search_backend
|
||||
if args.dask_address is not None:
|
||||
opts["dask_address"] = args.dask_address
|
||||
if args.dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
if args.tn_debug_trials:
|
||||
opts["debug_trials"] = True
|
||||
if search_only:
|
||||
opts["search_only"] = True
|
||||
opts["save_tree_path"] = str(tree_file)
|
||||
elif tree_file is not None:
|
||||
opts["load_tree_path"] = str(tree_file)
|
||||
return opts
|
||||
|
||||
|
||||
def run_one(args, case_name, obs_name, mode):
|
||||
case = CASES[case_name]
|
||||
circuit = build_circuit(case.circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
observable = pauli_sum_observable(obs_name, args.nqubits, args.seed)
|
||||
path = tree_path(
|
||||
args.tree_dir,
|
||||
case_name,
|
||||
obs_name,
|
||||
args.nqubits,
|
||||
args.nlayers,
|
||||
args.tn_target_slices,
|
||||
)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
rank = 0
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
f"mode={mode} case={case_name} circuit={case.circuit_kind} "
|
||||
f"observable={obs_name} nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"seed={args.seed} gates={len(circuit.queue)} tree={path}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if mode == "contract" and not path.exists():
|
||||
raise FileNotFoundError(f"Missing tree file: {path}. Run search first.")
|
||||
|
||||
exact = None
|
||||
if args.exact and rank == 0 and mode != "search":
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, observable, args.nqubits)
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz="tn",
|
||||
mpi=args.mpi,
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=args.quimb_backend,
|
||||
dtype=args.dtype,
|
||||
torch_threads=args.torch_threads,
|
||||
parallel_opts=build_parallel_opts(
|
||||
args,
|
||||
tree_file=path,
|
||||
search_only=(mode == "search"),
|
||||
),
|
||||
)
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if args.mpi and result.rank != 0:
|
||||
return
|
||||
|
||||
if mode == "search":
|
||||
print(f"searched observable={obs_name} tree={path}", flush=True)
|
||||
else:
|
||||
abs_error = float("nan") if exact is None else abs(result.value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"result observable={obs_name} exact={exact_text} "
|
||||
f"value={result.value:.16e} abs_error={abs_error:.6e} "
|
||||
f"rel_error={rel_error:.6e} seconds={result.seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for stat in result.parallel_stats or ():
|
||||
cost = stat["path_cost"]
|
||||
search_stats = stat.get("search_stats", {})
|
||||
print(
|
||||
"tn_term_summary "
|
||||
f"observable={obs_name} "
|
||||
f"term={stat.get('term_index', 0)} "
|
||||
f"search_seconds={stat.get('search_seconds', float('nan')):.3f} "
|
||||
f"contract_seconds={stat.get('contract_seconds', float('nan')):.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', 'na')} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={cost.get('nslices')} "
|
||||
f"log10_flops={cost.get('log10_flops', float('nan')):.3f} "
|
||||
f"log10_write={cost.get('log10_write', float('nan')):.3f} "
|
||||
f"log2_size={cost.get('log2_size', float('nan')):.3f} "
|
||||
f"peak_memory_gib={cost.get('peak_memory_gib', float('nan')):.3g} "
|
||||
f"rank_slices={stat.get('rank_slices')}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def selected_observables(args, case):
|
||||
if args.observables:
|
||||
return tuple(args.observables)
|
||||
if args.obs_filter:
|
||||
return tuple(x.strip() for x in args.obs_filter.split(",") if x.strip())
|
||||
return case.observables
|
||||
|
||||
|
||||
def apply_case_defaults(args):
|
||||
case = CASES[args.case]
|
||||
if args.nqubits is None:
|
||||
args.nqubits = case.nqubits
|
||||
if args.nlayers is None:
|
||||
args.nlayers = case.nlayers
|
||||
if args.seed is None:
|
||||
args.seed = case.seed
|
||||
if args.tn_target_slices is None:
|
||||
args.tn_target_slices = case.target_slices
|
||||
args.observables = selected_observables(args, case)
|
||||
|
||||
|
||||
def stop_dask_cluster(args):
|
||||
if args.keep_dask or args.tn_search_backend != "dask" or not args.dask_address:
|
||||
return
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
if MPI.COMM_WORLD.Get_rank() != 0:
|
||||
return
|
||||
script = ROOT / "tools" / "manage_tn_dask_cluster.sh"
|
||||
if not script.exists():
|
||||
print(f"dask_stop_skipped reason=missing_script path={script}", flush=True)
|
||||
return
|
||||
|
||||
env = os.environ.copy()
|
||||
parsed = urlparse(args.dask_address)
|
||||
if parsed.hostname:
|
||||
env.setdefault("SCHEDULER_HOST", parsed.hostname)
|
||||
if parsed.port:
|
||||
env.setdefault("SCHEDULER_PORT", str(parsed.port))
|
||||
|
||||
print("dask_stop_after_search start", flush=True)
|
||||
subprocess.run([str(script), "stop"], cwd=str(ROOT), env=env, check=False)
|
||||
print("dask_stop_after_search done", flush=True)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("mode", choices=("search", "contract", "all", "validate", "list"))
|
||||
parser.add_argument("--case", choices=sorted(CASES), default="main1")
|
||||
parser.add_argument("--observables", nargs="+")
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
parser.add_argument("--tree-dir", default="trees/contest_tn")
|
||||
parser.add_argument("--nqubits", type=int)
|
||||
parser.add_argument("--nlayers", type=int)
|
||||
parser.add_argument("--seed", type=int)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=1024)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--quimb-backend", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--dtype", choices=("complex128", "complex64"), default="complex64")
|
||||
parser.add_argument("--tn-target-slices", type=int)
|
||||
parser.add_argument("--tn-target-size", type=int, default=2**32)
|
||||
parser.add_argument("--tn-search-workers", type=int)
|
||||
parser.add_argument("--tn-search-repeats", type=int, default=2048)
|
||||
parser.add_argument("--tn-search-time", type=float, default=300.0)
|
||||
parser.add_argument(
|
||||
"--tn-search-backend",
|
||||
choices=("processpool", "dask"),
|
||||
default="dask",
|
||||
help=(
|
||||
"Path-search backend. Defaults to dask. Without --dask-address, "
|
||||
"non-MPI search starts a local dask cluster."
|
||||
),
|
||||
)
|
||||
parser.add_argument("--dask-address")
|
||||
parser.add_argument("--dask-close-workers", action="store_true")
|
||||
parser.add_argument(
|
||||
"--keep-dask",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Keep an external dask cluster running after search. By default, "
|
||||
"tools/manage_tn_dask_cluster.sh stop is called after search when "
|
||||
"--dask-address is used."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-debug-trials",
|
||||
action="store_true",
|
||||
help="Print dask worker summary and per-trial start/done logs.",
|
||||
)
|
||||
parser.add_argument("--no-tn-stats", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mode == "list":
|
||||
for name, case in CASES.items():
|
||||
print(
|
||||
f"{name}: circuit={case.circuit_kind} "
|
||||
f"observables={','.join(case.observables)} "
|
||||
f"nqubits={case.nqubits} nlayers={case.nlayers} "
|
||||
f"seed={case.seed} target_slices={case.target_slices}"
|
||||
)
|
||||
return
|
||||
|
||||
apply_case_defaults(args)
|
||||
set_torch_threads(args.torch_threads)
|
||||
|
||||
modes = ("search", "contract") if args.mode == "all" else (args.mode,)
|
||||
if args.mode == "validate":
|
||||
args.exact = True
|
||||
args.nqubits = min(args.nqubits, args.exact_max_qubits)
|
||||
modes = ("search", "contract")
|
||||
|
||||
for mode in modes:
|
||||
for obs_name in args.observables:
|
||||
run_one(args, args.case, obs_name, mode)
|
||||
if mode == "search":
|
||||
stop_dask_cluster(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
114
tools/torch_profile_tn_complex64.py
Normal file
114
tools/torch_profile_tn_complex64.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""Run the 34q/20L TN complex64 benchmark under torch.profiler briefly."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--seconds", type=float, default=30.0)
|
||||
parser.add_argument("--out-dir", default="torch_profiles/tn_complex64")
|
||||
parser.add_argument("--torch-threads", type=int, default=48)
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = Path(__file__).resolve().parents[1]
|
||||
os.chdir(repo_root)
|
||||
sys.path.insert(0, str(repo_root))
|
||||
|
||||
import torch
|
||||
from torch.profiler import ProfilerActivity, profile
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
out_dir = Path(args.out_dir)
|
||||
if rank == 0:
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
comm.Barrier()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
|
||||
def run_benchmark():
|
||||
import benchmark_cpu_expectation
|
||||
|
||||
sys.argv = [
|
||||
"benchmark_cpu_expectation.py",
|
||||
"--mpi",
|
||||
"--ansatz",
|
||||
"tn",
|
||||
"--nqubits",
|
||||
"34",
|
||||
"--nlayers",
|
||||
"20",
|
||||
"--circuits",
|
||||
"rxx_rzz",
|
||||
"--pauli-pattern",
|
||||
"XZ",
|
||||
"--tn-load-tree",
|
||||
"trees/rxx_rzz_34q20l_s4.pkl",
|
||||
"--quimb-backend",
|
||||
"torch",
|
||||
"--torch-threads",
|
||||
str(args.torch_threads),
|
||||
"--dtype",
|
||||
"complex64",
|
||||
]
|
||||
benchmark_cpu_expectation.main()
|
||||
|
||||
trace_path = out_dir / f"rank{rank}_trace.json"
|
||||
stacks_path = out_dir / f"rank{rank}_stacks.txt"
|
||||
summary_path = out_dir / f"rank{rank}_summary.txt"
|
||||
|
||||
prof = profile(
|
||||
activities=[ProfilerActivity.CPU],
|
||||
record_shapes=True,
|
||||
profile_memory=True,
|
||||
with_stack=True,
|
||||
)
|
||||
|
||||
class ProfileTimeout(Exception):
|
||||
pass
|
||||
|
||||
def alarm_handler(signum, frame):
|
||||
raise ProfileTimeout()
|
||||
|
||||
old_handler = signal.signal(signal.SIGALRM, alarm_handler)
|
||||
signal.setitimer(signal.ITIMER_REAL, args.seconds)
|
||||
try:
|
||||
with prof:
|
||||
try:
|
||||
run_benchmark()
|
||||
except ProfileTimeout:
|
||||
pass
|
||||
finally:
|
||||
signal.setitimer(signal.ITIMER_REAL, 0)
|
||||
signal.signal(signal.SIGALRM, old_handler)
|
||||
|
||||
prof.export_chrome_trace(str(trace_path))
|
||||
try:
|
||||
prof.export_stacks(str(stacks_path), "self_cpu_time_total")
|
||||
except Exception as exc: # pragma: no cover - diagnostic only
|
||||
stacks_path.write_text(f"export_stacks failed: {exc}\n", encoding="utf-8")
|
||||
|
||||
summary = prof.key_averages(group_by_stack_n=5).table(
|
||||
sort_by="self_cpu_time_total",
|
||||
row_limit=40,
|
||||
)
|
||||
summary_path.write_text(summary, encoding="utf-8")
|
||||
|
||||
print(
|
||||
f"torch_profile_done rank={rank}/{size} "
|
||||
f"trace={trace_path} summary={summary_path}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
202
tools/validate_vidal_mpi_correctness.py
Normal file
202
tools/validate_vidal_mpi_correctness.py
Normal file
@@ -0,0 +1,202 @@
|
||||
"""Correctness checks for the Vidal/TEBD MPS fast path.
|
||||
|
||||
The cases here intentionally cover more than the benchmark ring-XZ observable:
|
||||
different nearest-neighbor gate orientations and several Pauli-sum observables.
|
||||
Run serially to compare qibojit/statevector vs Vidal, or under MPI to compare
|
||||
the segmented Vidal executor.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from qibo import Circuit, gates
|
||||
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind == "rx_ry_cz":
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind in ("brickwall", "reversed_cnot"):
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
if kind == "reversed_cnot" and (layer % 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
else:
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
if kind == "reversed_cnot" and not (layer % 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
else:
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
elif kind == "rx_ry_cz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
return circuit
|
||||
|
||||
|
||||
def observable_terms(kind, nqubits):
|
||||
if kind == "ring_xz":
|
||||
return [
|
||||
(0.5, (("X", site), ("Z", (site + 1) % nqubits)))
|
||||
for site in range(nqubits)
|
||||
]
|
||||
if kind == "open_zz":
|
||||
return [
|
||||
(1.0 / (nqubits - 1), (("Z", site), ("Z", site + 1)))
|
||||
for site in range(nqubits - 1)
|
||||
]
|
||||
if kind == "mixed_local":
|
||||
terms = [(0.25, (("X", 0),)), (-0.5, (("Z", nqubits - 1),))]
|
||||
terms += [
|
||||
(0.125, (("Y", site), ("Y", site + 1)))
|
||||
for site in range(0, nqubits - 1, 3)
|
||||
]
|
||||
return terms
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def exact_pauli_sum(circuit, terms, nqubits):
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
indices = np.arange(state.size, dtype=np.int64)
|
||||
value = 0.0 + 0.0j
|
||||
for coeff, ops in terms:
|
||||
flipped = indices.copy()
|
||||
phase = np.ones(state.size, dtype=np.complex128)
|
||||
for name, site in ops:
|
||||
shift = nqubits - 1 - site
|
||||
bit = (indices >> shift) & 1
|
||||
name = name.upper()
|
||||
if name == "X":
|
||||
flipped ^= 1 << shift
|
||||
elif name == "Y":
|
||||
flipped ^= 1 << shift
|
||||
phase *= 1j * (1 - 2 * bit)
|
||||
elif name == "Z":
|
||||
phase *= 1 - 2 * bit
|
||||
elif name != "I":
|
||||
raise ValueError(f"Unsupported Pauli {name!r}.")
|
||||
value += coeff * np.vdot(state[flipped], phase * state)
|
||||
return float(value.real)
|
||||
|
||||
|
||||
def run_vidal(circuit, terms, nqubits, bond, tensor_module):
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return float(executor.expectation_pauli_sum(terms))
|
||||
|
||||
|
||||
def run_segment_mpi(circuit, terms, nqubits, bond, tensor_module, comm):
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return executor.expectation_pauli_sum_root(terms)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=16)
|
||||
parser.add_argument("--nlayers", type=int, default=6)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("torch", "numpy"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument(
|
||||
"--circuits",
|
||||
nargs="+",
|
||||
default=("brickwall", "reversed_cnot", "rx_ry_cz"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--observables",
|
||||
nargs="+",
|
||||
default=("ring_xz", "open_zz", "mixed_local"),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
comm = None
|
||||
rank = 0
|
||||
size = 1
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
if rank == 0:
|
||||
mode = f"vidal-segment-mpi/{size}" if args.mpi else "vidal"
|
||||
print(
|
||||
f"mode={mode} nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={args.bond} tensor_module={args.tensor_module}"
|
||||
)
|
||||
print("circuit observable exact value abs_error seconds")
|
||||
|
||||
for circuit_kind in args.circuits:
|
||||
circuit = build_circuit(circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
exact = None
|
||||
if rank == 0:
|
||||
exact_values = {
|
||||
obs: exact_pauli_sum(
|
||||
circuit, observable_terms(obs, args.nqubits), args.nqubits
|
||||
)
|
||||
for obs in args.observables
|
||||
}
|
||||
else:
|
||||
exact_values = None
|
||||
if comm is not None:
|
||||
exact_values = comm.bcast(exact_values, root=0)
|
||||
|
||||
for obs_kind in args.observables:
|
||||
terms = observable_terms(obs_kind, args.nqubits)
|
||||
start = time.perf_counter()
|
||||
if args.mpi:
|
||||
value = run_segment_mpi(
|
||||
circuit,
|
||||
terms,
|
||||
args.nqubits,
|
||||
args.bond,
|
||||
args.tensor_module,
|
||||
comm,
|
||||
)
|
||||
else:
|
||||
value = run_vidal(
|
||||
circuit, terms, args.nqubits, args.bond, args.tensor_module
|
||||
)
|
||||
if rank != 0:
|
||||
continue
|
||||
elapsed = time.perf_counter() - start
|
||||
exact = exact_values[obs_kind]
|
||||
print(
|
||||
f"{circuit_kind} {obs_kind} {exact:.16e} {value:.16e} "
|
||||
f"{abs(value - exact):.6e} {elapsed:.3f}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
209
tools/vidal_mpi_contest_runner.py
Normal file
209
tools/vidal_mpi_contest_runner.py
Normal file
@@ -0,0 +1,209 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind in ("rxx_rzz", "scramble"):
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind == "reversed_cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 else gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 == 0 else gates.CNOT(q, q + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
elif kind == "scramble":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def open_zz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 1):
|
||||
form += (1.0 / (nqubits - 1)) * Z(q) * Z(q + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def range2_xx(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 2):
|
||||
form += (1.0 / (nqubits - 2)) * X(q) * X(q + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def dense_observable(nqubits, qubits, seed, dim):
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def observables_for_case(nqubits, seed):
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
return [
|
||||
("boundary_ZZ_q1", hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))),
|
||||
("boundary_ZZ_q2", hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))),
|
||||
("boundary_ZZ_q3", hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))),
|
||||
(
|
||||
"long_Z_5_sites",
|
||||
hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last)),
|
||||
),
|
||||
(
|
||||
"mixed_XZYZX",
|
||||
hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last)),
|
||||
),
|
||||
("ring_xz", ring_xz(nqubits)),
|
||||
("open_zz", open_zz(nqubits)),
|
||||
("range2_xx", range2_xx(nqubits)),
|
||||
("complex_iZ0", hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))),
|
||||
("dense2_mid", dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)),
|
||||
("dense3_spread", dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)),
|
||||
]
|
||||
|
||||
|
||||
def run_case(args):
|
||||
set_torch_threads(args.torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
circuit = build_circuit(args.kind, args.nqubits, args.nlayers, args.seed)
|
||||
observables = observables_for_case(args.nqubits, args.seed)
|
||||
if args.obs_filter:
|
||||
wanted = set(args.obs_filter.split(","))
|
||||
observables = [(name, obs) for name, obs in observables if name in wanted]
|
||||
if not observables:
|
||||
raise ValueError(f"OBS_FILTER matched no observables: {args.obs_filter!r}")
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
"case "
|
||||
f"label={args.label} kind={args.kind} ranks={size} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} gates={len(circuit.queue)} "
|
||||
f"bond={format_optional(args.bond)} "
|
||||
f"cut_ratio={format_optional(args.cut_ratio)} "
|
||||
f"torch_threads={args.torch_threads} seed={args.seed} "
|
||||
f"obs_filter={args.obs_filter or 'all'}",
|
||||
flush=True,
|
||||
)
|
||||
print(
|
||||
"observable value seconds trunc_sum trunc_max status",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for obs_name, observable in observables:
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
status = "ok"
|
||||
except Exception as exc: # pragma: no cover - printed for manual runs
|
||||
value = np.nan
|
||||
status = type(exc).__name__ + ":" + str(exc).split("\n", 1)[0]
|
||||
seconds = time.perf_counter() - start
|
||||
|
||||
if rank == 0:
|
||||
print(
|
||||
f"{obs_name} {value!r} {seconds:.3f} "
|
||||
f"{backend.last_truncation_error:.6e} "
|
||||
f"{backend.last_max_truncation_error:.6e} {status}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--label", required=True)
|
||||
parser.add_argument("--kind", choices=("reversed_cnot", "rxx_rzz", "scramble"), required=True)
|
||||
parser.add_argument("--nqubits", type=int, required=True)
|
||||
parser.add_argument("--nlayers", type=int, required=True)
|
||||
parser.add_argument("--bond", type=optional_int, required=True)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, required=True)
|
||||
parser.add_argument("--seed", type=int, required=True)
|
||||
parser.add_argument("--torch-threads", type=int, required=True)
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
run_case(parser.parse_args())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
BIN
trees/contest_tn/main1_long_z_string_34q20l_auto.pkl
Normal file
BIN
trees/contest_tn/main1_long_z_string_34q20l_auto.pkl
Normal file
Binary file not shown.
BIN
trees/contest_tn/main1_ring_xz_8q2l_s1.pkl
Normal file
BIN
trees/contest_tn/main1_ring_xz_8q2l_s1.pkl
Normal file
Binary file not shown.
BIN
trees/contest_tn/smoke_rxx_rzz_34q20l_xz_auto.pkl
Normal file
BIN
trees/contest_tn/smoke_rxx_rzz_34q20l_xz_auto.pkl
Normal file
Binary file not shown.
BIN
trees/contest_tn/smoke_rxx_rzz_34q20l_xz_repeat192.pkl
Normal file
BIN
trees/contest_tn/smoke_rxx_rzz_34q20l_xz_repeat192.pkl
Normal file
Binary file not shown.
BIN
trees/contest_tn/smoke_rxx_rzz_34q20l_xz_timeout_stop.pkl
Normal file
BIN
trees/contest_tn/smoke_rxx_rzz_34q20l_xz_timeout_stop.pkl
Normal file
Binary file not shown.
BIN
trees/rxx_rzz_30q20l.pkl
Normal file
BIN
trees/rxx_rzz_30q20l.pkl
Normal file
Binary file not shown.
BIN
trees/rxx_rzz_30q20l_from_existing_s2_check.pkl
Normal file
BIN
trees/rxx_rzz_30q20l_from_existing_s2_check.pkl
Normal file
Binary file not shown.
BIN
trees/rxx_rzz_30q20l_from_existing_s4.pkl
Normal file
BIN
trees/rxx_rzz_30q20l_from_existing_s4.pkl
Normal file
Binary file not shown.
BIN
trees/rxx_rzz_34q20l_s4.pkl
Normal file
BIN
trees/rxx_rzz_34q20l_s4.pkl
Normal file
Binary file not shown.
Reference in New Issue
Block a user