Compare commits
5 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| edc063f95d | |||
| e38fd02cf3 | |||
| a96b71a8bc | |||
| 4b7fc931ba | |||
| bcad2882fa |
13
.gitignore
vendored
13
.gitignore
vendored
@@ -2,9 +2,10 @@
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
data/
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
@@ -159,13 +160,3 @@ cython_debug/
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
.devenv
|
||||
|
||||
|
||||
# yx
|
||||
bak/
|
||||
path/
|
||||
profiles/
|
||||
vtune_expval/
|
||||
perf*
|
||||
experiments/
|
||||
references/
|
||||
14
README.md
14
README.md
@@ -12,7 +12,7 @@ Tensor Network Types:
|
||||
Tensor Network contractions to:
|
||||
|
||||
- dense vectors
|
||||
- expectation values of given Pauli strings or Pauli-sum observables
|
||||
- expecation values of given Pauli string
|
||||
|
||||
The supported HPC configurations are:
|
||||
|
||||
@@ -26,18 +26,6 @@ Currently, the supported tensor network libraries are:
|
||||
- [cuQuantum](https://github.com/NVIDIA/cuQuantum), an NVIDIA SDK of optimized libraries and tools for accelerating quantum computing workflows.
|
||||
- [quimb](https://quimb.readthedocs.io/en/latest/), an easy but fast python library for ‘quantum information many-body’ calculations, focusing primarily on tensor networks.
|
||||
|
||||
## CPU expectation benchmarks
|
||||
|
||||
The current CPU expectation entrypoint is:
|
||||
|
||||
```sh
|
||||
python -u benchmark_cpu_expectation.py --ansatz mps --nqubits 40 --nlayers 10 --bond 2048 --circuits brickwall_cnot --observables ring_xz
|
||||
```
|
||||
|
||||
Use `--ansatz tn` for the generic TN path and `--mpi` under `mpiexec` for MPI runs.
|
||||
Reusable circuit and observable builders live in `src/qibotn/benchmark_cases.py`; execution logic lives in `src/qibotn/expectation_runner.py`.
|
||||
For Vidal/MPS 1D-chain scale tests, use `run_vidal_mps_cases.sh`.
|
||||
|
||||
## Installation
|
||||
|
||||
To get started:
|
||||
|
||||
@@ -1,285 +0,0 @@
|
||||
"""CLI for CPU TN/MPS expectation benchmarks."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from qibotn.benchmark_cases import (
|
||||
CIRCUITS,
|
||||
OBSERVABLES,
|
||||
build_circuit,
|
||||
observable_terms,
|
||||
parse_names,
|
||||
terms_to_dict,
|
||||
)
|
||||
from qibotn.expectation_runner import (
|
||||
ExpectationConfig,
|
||||
exact_for_observable,
|
||||
run_cpu_expectation,
|
||||
)
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def should_stop_dask(args):
|
||||
return (
|
||||
not args.keep_dask
|
||||
and args.tn_search_backend == "dask"
|
||||
and args.dask_address is not None
|
||||
and args.tn_load_tree is None
|
||||
)
|
||||
|
||||
|
||||
def stop_dask_cluster(args, rank):
|
||||
if rank != 0 or not should_stop_dask(args):
|
||||
return
|
||||
script = Path(__file__).resolve().parent / "tools" / "manage_tn_dask_cluster.sh"
|
||||
if not script.exists():
|
||||
print(f"dask_stop_skipped reason=missing_script path={script}", flush=True)
|
||||
return
|
||||
|
||||
env = os.environ.copy()
|
||||
parsed = urlparse(args.dask_address)
|
||||
if parsed.hostname:
|
||||
env.setdefault("SCHEDULER_HOST", parsed.hostname)
|
||||
if parsed.port:
|
||||
env.setdefault("SCHEDULER_PORT", str(parsed.port))
|
||||
|
||||
print("dask_stop_after_search start", flush=True)
|
||||
subprocess.run([str(script), "stop"], cwd=str(script.parent.parent), env=env, check=False)
|
||||
print("dask_stop_after_search done", flush=True)
|
||||
|
||||
|
||||
def build_parallel_opts(args):
|
||||
slicing_opts = {}
|
||||
if args.tn_target_slices is not None:
|
||||
slicing_opts["target_slices"] = args.tn_target_slices
|
||||
if args.tn_target_size is not None:
|
||||
slicing_opts["target_size"] = args.tn_target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": args.tn_search_workers or args.torch_threads,
|
||||
"max_repeats": args.tn_search_repeats,
|
||||
"max_time": args.tn_search_time,
|
||||
"print_stats": not args.no_tn_stats,
|
||||
}
|
||||
if args.tn_search_backend is not None:
|
||||
opts["search_backend"] = args.tn_search_backend
|
||||
if args.dask_address is not None:
|
||||
opts["dask_address"] = args.dask_address
|
||||
if args.tn_save_tree is not None:
|
||||
opts["save_tree_path"] = args.tn_save_tree
|
||||
if args.tn_load_tree is not None:
|
||||
opts["load_tree_path"] = args.tn_load_tree
|
||||
if args.tn_search_only:
|
||||
opts["search_only"] = True
|
||||
if args.tn_debug_trials:
|
||||
opts["debug_trials"] = True
|
||||
if args.tn_contract_implementation is not None:
|
||||
opts["contract_implementation"] = args.tn_contract_implementation
|
||||
if args.dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
return opts
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=40)
|
||||
parser.add_argument("--nlayers", type=int, default=30)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=1024)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--quimb-backend", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument(
|
||||
"--dtype",
|
||||
choices=("complex128", "complex64"),
|
||||
default="complex128",
|
||||
)
|
||||
parser.add_argument("--ansatz", choices=("tn", "mps"), default=None)
|
||||
parser.add_argument("--mps", action="store_true")
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--circuits", nargs="+", default=["brickwall_cnot"])
|
||||
parser.add_argument("--observables", nargs="+", default=["ring_xz"])
|
||||
parser.add_argument("--pauli-pattern")
|
||||
parser.add_argument("--tn-target-slices", type=int)
|
||||
parser.add_argument("--tn-target-size", type=int,default=2**32)
|
||||
parser.add_argument("--tn-search-workers", type=int)
|
||||
parser.add_argument("--tn-search-repeats", type=int, default=128)
|
||||
parser.add_argument("--tn-search-time", type=float, default=60.0)
|
||||
parser.add_argument(
|
||||
"--no-tn-stats",
|
||||
action="store_true",
|
||||
help="Do not print per-term TN search/contraction diagnostics.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-search-backend",
|
||||
choices=("processpool", "dask"),
|
||||
default="dask",
|
||||
help="Path-search backend. In MPI mode, dask search runs only on rank 0 and broadcasts the tree.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dask-address",
|
||||
help="Dask scheduler address, for example tcp://host:8786. If omitted with dask search, a local cluster is created.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dask-close-workers",
|
||||
action="store_true",
|
||||
help="After dask path search, ask the scheduler to close all currently connected workers.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-dask",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Keep an external dask cluster running after search. By default, "
|
||||
"tools/manage_tn_dask_cluster.sh stop is called after search when "
|
||||
"--dask-address is used."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-save-tree",
|
||||
help="Save searched cotengra contraction tree(s) to this pickle file.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-load-tree",
|
||||
help="Load cotengra contraction tree(s) from this pickle file and skip path search.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-search-only",
|
||||
action="store_true",
|
||||
help="Only run path search and optional --tn-save-tree; skip contraction.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-debug-trials",
|
||||
action="store_true",
|
||||
help="Print dask worker summary and per-trial worker start/done logs.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-contract-implementation",
|
||||
choices=("auto", "cotengra", "autoray", "cpp"),
|
||||
help="cotengra contraction implementation for TN contraction.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
ansatz = "mps" if args.mps else (args.ansatz or "tn")
|
||||
circuits = parse_names(args.circuits, CIRCUITS, "circuits")
|
||||
observables = [] if args.pauli_pattern else parse_names(
|
||||
args.observables, OBSERVABLES, "observables"
|
||||
)
|
||||
|
||||
rank = 0
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz=ansatz,
|
||||
mpi=args.mpi,
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=args.quimb_backend,
|
||||
dtype=args.dtype,
|
||||
torch_threads=args.torch_threads,
|
||||
parallel_opts=build_parallel_opts(args),
|
||||
)
|
||||
|
||||
if rank == 0:
|
||||
mode = "MPI" if args.mpi else "serial"
|
||||
print(
|
||||
f"backend=cpu ansatz={ansatz.upper()} mode={mode} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={format_optional(args.bond)} "
|
||||
f"cut_ratio={format_optional(args.cut_ratio)} seed={args.seed} "
|
||||
f"quimb_backend={args.quimb_backend} dtype={args.dtype} "
|
||||
f"torch_threads={args.torch_threads} "
|
||||
f"tn_search_backend={args.tn_search_backend}"
|
||||
)
|
||||
print("circuit observable exact value abs_error rel_error seconds")
|
||||
|
||||
try:
|
||||
for circuit_kind in circuits:
|
||||
circuit = build_circuit(circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
named_observables = (
|
||||
[(f"pattern:{args.pauli_pattern}", {"pauli_string_pattern": args.pauli_pattern})]
|
||||
if args.pauli_pattern
|
||||
else [
|
||||
(obs_kind, terms_to_dict(observable_terms(obs_kind, args.nqubits)))
|
||||
for obs_kind in observables
|
||||
]
|
||||
)
|
||||
|
||||
for obs_name, observable in named_observables:
|
||||
exact = None
|
||||
if args.exact and rank == 0:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, observable, args.nqubits)
|
||||
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if args.mpi and result.rank != 0:
|
||||
continue
|
||||
|
||||
abs_error = float("nan") if exact is None else abs(result.value - exact)
|
||||
rel_error = (
|
||||
float("nan")
|
||||
if exact is None
|
||||
else abs_error / max(abs(exact), 1e-15)
|
||||
)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"{circuit_kind} {obs_name} {exact_text} {result.value:.16e} "
|
||||
f"{abs_error:.6e} {rel_error:.6e} {result.seconds:.3f}"
|
||||
)
|
||||
for stat in result.parallel_stats or ():
|
||||
cost = stat["path_cost"]
|
||||
search_stats = stat.get("search_stats", {})
|
||||
print(
|
||||
"tn_term_summary "
|
||||
f"term={stat.get('term_index', 0)} "
|
||||
f"search_seconds={stat.get('search_seconds', float('nan')):.3f} "
|
||||
f"contract_seconds={stat.get('contract_seconds', float('nan')):.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', 'na')} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={cost['nslices']} "
|
||||
f"log10_flops={cost['log10_flops']:.3f} "
|
||||
f"log10_write={cost['log10_write']:.3f} "
|
||||
f"log2_size={cost['log2_size']:.3f} "
|
||||
f"log10_combo={cost['log10_combo']:.3f} "
|
||||
f"peak_memory_gib={cost['peak_memory_gib']:.6g} "
|
||||
f"slicing_overhead={cost['slicing_overhead']:.6g} "
|
||||
f"rank_slices={stat.get('rank_slices', 'na')}"
|
||||
)
|
||||
finally:
|
||||
stop_dask_cluster(args, rank)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
70
doc/make.bat
70
doc/make.bat
@@ -1,35 +1,35 @@
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
||||
@ECHO OFF
|
||||
|
||||
pushd %~dp0
|
||||
|
||||
REM Command file for Sphinx documentation
|
||||
|
||||
if "%SPHINXBUILD%" == "" (
|
||||
set SPHINXBUILD=sphinx-build
|
||||
)
|
||||
set SOURCEDIR=source
|
||||
set BUILDDIR=build
|
||||
|
||||
%SPHINXBUILD% >NUL 2>NUL
|
||||
if errorlevel 9009 (
|
||||
echo.
|
||||
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
|
||||
echo.installed, then set the SPHINXBUILD environment variable to point
|
||||
echo.to the full path of the 'sphinx-build' executable. Alternatively you
|
||||
echo.may add the Sphinx directory to PATH.
|
||||
echo.
|
||||
echo.If you don't have Sphinx installed, grab it from
|
||||
echo.https://www.sphinx-doc.org/
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
if "%1" == "" goto help
|
||||
|
||||
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
goto end
|
||||
|
||||
:help
|
||||
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
|
||||
|
||||
:end
|
||||
popd
|
||||
|
||||
@@ -1,53 +0,0 @@
|
||||
# TN
|
||||
```bash
|
||||
# qibotn目录下
|
||||
I_MPI_FABRICS=shm:ofi \
|
||||
I_MPI_OFI_PROVIDER=tcp \
|
||||
FI_PROVIDER=tcp \
|
||||
CASE=main1 \
|
||||
OBSERVABLES=long_z_string \
|
||||
NQUBITS=34 \
|
||||
NLAYERS=20 \
|
||||
TORCH_THREADS=48 \
|
||||
SEARCH_REPEATS=2048 \
|
||||
SEARCH_TIME=300 \
|
||||
SCHEDULER_HOST=10.20.1.103 \
|
||||
WORKER_HOSTS="10.20.1.103 10.20.6.101" \
|
||||
DASK_ADDRESS="tcp://10.20.1.103:8786" \
|
||||
NWORKERS=84 \
|
||||
NTHREADS=1 \
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2" \
|
||||
tools/run_tn_dask_mpi_all.sh
|
||||
|
||||
# 单独缩并contract计算
|
||||
|
||||
I_MPI_FABRICS=shm:ofi \
|
||||
I_MPI_OFI_PROVIDER=tcp \
|
||||
FI_PROVIDER=tcp \
|
||||
mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2 \
|
||||
.venv/bin/python -u tools/tn_contest_runner.py contract \
|
||||
--mpi \
|
||||
--case main1 \
|
||||
--nqubits 34 \
|
||||
--nlayers 20 \
|
||||
--observables long_z_string \
|
||||
--tree-dir trees/contest_tn \
|
||||
--torch-threads 48 \
|
||||
--dtype complex64
|
||||
```
|
||||
|
||||
# MPS
|
||||
```
|
||||
cd /home/yx/qibotn
|
||||
|
||||
I_MPI_FABRICS=shm:ofi \
|
||||
I_MPI_OFI_PROVIDER=tcp \
|
||||
FI_PROVIDER=tcp \
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2" \
|
||||
TORCH_THREADS=48 \
|
||||
OBS_FILTER=ring_xz \
|
||||
MAIN1_NQ=128 \
|
||||
MAIN1_LAYERS=24 \
|
||||
MAIN1_BOND=1024 \
|
||||
tools/run_vidal_mpi_contest_cases.sh main1
|
||||
```
|
||||
6
poetry.lock
generated
6
poetry.lock
generated
@@ -1733,14 +1733,14 @@ files = [
|
||||
|
||||
[[package]]
|
||||
name = "mako"
|
||||
version = "1.3.11"
|
||||
version = "1.3.10"
|
||||
description = "A super-fast templating language that borrows the best ideas from the existing templating languages."
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
files = [
|
||||
{file = "mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77"},
|
||||
{file = "mako-1.3.11.tar.gz", hash = "sha256:071eb4ab4c5010443152255d77db7faa6ce5916f35226eb02dc34479b6858069"},
|
||||
{file = "mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59"},
|
||||
{file = "mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
||||
@@ -31,13 +31,11 @@ cuquantum-python-cu12 = { version = "^25.9.1", optional = true }
|
||||
qmatchatea = { version = "^1.4.3", optional = true }
|
||||
qiskit = { version = "^1.4.0", optional = true }
|
||||
qtealeaves = { version = "^1.5.20", optional = true }
|
||||
distributed = { version = ">=2024", optional = true }
|
||||
|
||||
|
||||
[tool.poetry.extras]
|
||||
cuda = ["cupy-cuda12x", "cuda-toolkit", "nvidia-nccl-cu12", "cuquantum-python-cu12", "mpi4py"]
|
||||
qmatchatea = ["qmatchatea"]
|
||||
dask = ["distributed"]
|
||||
|
||||
[tool.poetry.group.docs]
|
||||
optional = true
|
||||
|
||||
@@ -1,134 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Focused Vidal/MPS expectation test cases for 1D chain circuits.
|
||||
#
|
||||
# These cases intentionally avoid qmatchatea and generic TN paths. They target
|
||||
# the current supported scope: one-qubit gates, adjacent two-qubit gates, and
|
||||
# Pauli-sum expectation values on a 1D chain.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-hostfile}"
|
||||
|
||||
THREADS="${THREADS:-32}"
|
||||
MPI_RANKS="${MPI_RANKS:-16}"
|
||||
MPI_THREADS="${MPI_THREADS:-12}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
run() {
|
||||
echo
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "$*"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
smoke)
|
||||
# Short correctness-oriented run. Useful before starting long jobs.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits 40 \
|
||||
--nlayers 10 \
|
||||
--bond 2048 \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx long_z_string
|
||||
;;
|
||||
|
||||
convergence)
|
||||
# Same circuit/observable, increasing bond. Check value convergence.
|
||||
for bond in ${BONDS:-4096 16384 65536}; do
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "$bond" \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits "${CIRCUIT:-brickwall_cnot}" \
|
||||
--observables "${OBSERVABLE:-ring_xz}"
|
||||
done
|
||||
;;
|
||||
|
||||
single-long)
|
||||
# Single long Vidal run. On node-3, a similar n=40,l=30,bond=2048 case
|
||||
# took about 9 minutes for one expectation. This one is meant to be longer.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "${BOND:-65536}" \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits "${CIRCUIT:-brickwall_cnot}" \
|
||||
--observables "${OBSERVABLE:-ring_xz}"
|
||||
;;
|
||||
|
||||
suite-long)
|
||||
# Application-style multi-circuit, multi-observable MPS run.
|
||||
# This is intentionally multi-term and should run much longer than single-long.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "${BOND:-65536}" \
|
||||
--torch-threads "$THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
mpi-long)
|
||||
# Multi-node Vidal segmented MPS run. Uses HOSTFILE.
|
||||
run "$MPIEXEC" -hostfile "$HOSTFILE" -n "$MPI_RANKS" "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-16}" \
|
||||
--bond "${BOND:-65536}" \
|
||||
--torch-threads "$MPI_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
stress)
|
||||
# Heavier entanglement. Start only after single-long is stable.
|
||||
run "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
--mps \
|
||||
--nqubits "${NQ:-80}" \
|
||||
--nlayers "${LAYERS:-18}" \
|
||||
--bond "${BOND:-262144}" \
|
||||
--torch-threads "${THREADS:-48}" \
|
||||
--circuits "${CIRCUIT:-rxx_rzz}" \
|
||||
--observables ring_xz open_zz range2_xx
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat <<'EOF'
|
||||
Usage: ./run_vidal_mps_cases.sh [smoke|convergence|single-long|suite-long|mpi-long|stress]
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
THREADS=32
|
||||
OMP_NUM_THREADS=1 MKL_NUM_THREADS=1
|
||||
|
||||
Single-node scale overrides:
|
||||
NQ=80 LAYERS=16 BOND=65536
|
||||
CIRCUIT=brickwall_cnot
|
||||
OBSERVABLE=ring_xz
|
||||
BONDS="4096 16384 65536" # for convergence mode
|
||||
|
||||
Multi-node overrides:
|
||||
HOSTFILE=hostfile
|
||||
MPI_RANKS=16 MPI_THREADS=12
|
||||
|
||||
Recommended first runs:
|
||||
./run_vidal_mps_cases.sh smoke
|
||||
./run_vidal_mps_cases.sh convergence
|
||||
./run_vidal_mps_cases.sh single-long
|
||||
EOF
|
||||
;;
|
||||
esac
|
||||
@@ -3,10 +3,9 @@ from typing import Union
|
||||
from qibo.config import raise_error
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.backends.cpu import CpuTensorNet
|
||||
from qibotn.backends.cutensornet import CuTensorNet # pylint: disable=E0401
|
||||
|
||||
PLATFORMS = ("cutensornet", "cpu", "quimb", "qmatchatea", "vidal")
|
||||
PLATFORMS = ("cutensornet", "quimb", "qmatchatea")
|
||||
|
||||
|
||||
class MetaBackend:
|
||||
@@ -25,12 +24,10 @@ class MetaBackend:
|
||||
|
||||
if platform == "cutensornet": # pragma: no cover
|
||||
return CuTensorNet(runcard)
|
||||
elif platform == "cpu":
|
||||
return CpuTensorNet(runcard)
|
||||
elif platform == "quimb": # pragma: no cover
|
||||
import qibotn.backends.quimb as qmb
|
||||
|
||||
quimb_backend = kwargs.get("quimb_backend", "torch")
|
||||
quimb_backend = kwargs.get("quimb_backend", "numpy")
|
||||
contraction_optimizer = kwargs.get("contraction_optimizer", "auto-hq")
|
||||
return qmb.BACKENDS[quimb_backend](
|
||||
quimb_backend=quimb_backend, contraction_optimizer=contraction_optimizer
|
||||
@@ -39,10 +36,6 @@ class MetaBackend:
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
|
||||
return QMatchaTeaBackend()
|
||||
elif platform == "vidal":
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
return VidalBackend()
|
||||
else:
|
||||
raise_error(
|
||||
NotImplementedError,
|
||||
|
||||
@@ -1,752 +0,0 @@
|
||||
"""CPU tensor-network backend with cutensornet-style runcard support."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import pickle
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from qibo import hamiltonians
|
||||
from qibo.backends import NumpyBackend
|
||||
from qibo.config import raise_error
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.backends.vidal import (
|
||||
_observable_mpo_tensors,
|
||||
_operator_terms_to_mpo,
|
||||
_symbolic_hamiltonian_to_operator_terms,
|
||||
_unsupported_reason,
|
||||
)
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor
|
||||
from qibotn.observables import check_observable
|
||||
from qibotn.result import TensorNetworkResult
|
||||
|
||||
|
||||
def _as_bool_or_dict(value, name):
|
||||
if isinstance(value, (bool, dict)):
|
||||
return value
|
||||
raise TypeError(f"{name} has an unexpected type")
|
||||
|
||||
|
||||
def _bind_numa_node(rank):
|
||||
"""Bind the calling process (or thread) to the NUMA node for *rank*.
|
||||
|
||||
The MPI rank is converted to a local (per-node) rank through the
|
||||
environment variables commonly set by Open MPI, MVAPICH, and Slurm.
|
||||
The process CPU affinity and NUMA memory policy are set accordingly.
|
||||
|
||||
Returns the NUMA domain that was selected, or ``None`` if the binding
|
||||
could not be determined.
|
||||
"""
|
||||
current_affinity = os.sched_getaffinity(0)
|
||||
online_cpus = set(range(os.cpu_count() or 1))
|
||||
if current_affinity and current_affinity != online_cpus:
|
||||
# MPI launchers such as Intel MPI often pin local ranks correctly
|
||||
# before Python starts. Do not narrow that placement further.
|
||||
return None
|
||||
|
||||
local_rank = rank
|
||||
for name in (
|
||||
"OMPI_COMM_WORLD_LOCAL_RANK",
|
||||
"MV2_COMM_WORLD_LOCAL_RANK",
|
||||
"MPI_LOCALRANKID",
|
||||
"I_MPI_LOCAL_RANK",
|
||||
"SLURM_LOCALID",
|
||||
):
|
||||
try:
|
||||
local_rank = int(os.environ[name])
|
||||
break
|
||||
except (KeyError, ValueError):
|
||||
pass
|
||||
|
||||
domains = _available_numa_domains()
|
||||
if not domains:
|
||||
return None
|
||||
|
||||
local_size = _local_world_size()
|
||||
assigned_domains = domains[local_rank::local_size]
|
||||
if not assigned_domains:
|
||||
assigned_domains = [domains[local_rank % len(domains)]]
|
||||
|
||||
domain = assigned_domains[0]
|
||||
cpus = set()
|
||||
for selected in assigned_domains:
|
||||
cpulist = f"/sys/devices/system/node/node{selected}/cpulist"
|
||||
try:
|
||||
cpus.update(_parse_cpu_list(open(cpulist, encoding="utf-8").read().strip()))
|
||||
except (FileNotFoundError, OSError):
|
||||
pass
|
||||
try:
|
||||
if cpus:
|
||||
os.sched_setaffinity(0, cpus)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
|
||||
libnuma = ctypes.CDLL("libnuma.so.1")
|
||||
if libnuma.numa_available() >= 0:
|
||||
libnuma.numa_run_on_node(ctypes.c_int(domain))
|
||||
libnuma.numa_set_preferred(ctypes.c_int(domain))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return domain
|
||||
|
||||
|
||||
def _available_numa_domains():
|
||||
nodes = []
|
||||
base = Path("/sys/devices/system/node")
|
||||
try:
|
||||
for path in base.glob("node[0-9]*"):
|
||||
try:
|
||||
nodes.append(int(path.name[4:]))
|
||||
except ValueError:
|
||||
pass
|
||||
except OSError:
|
||||
return []
|
||||
return sorted(nodes)
|
||||
|
||||
|
||||
def _local_world_size():
|
||||
for name in (
|
||||
"OMPI_COMM_WORLD_LOCAL_SIZE",
|
||||
"MV2_COMM_WORLD_LOCAL_SIZE",
|
||||
"MPI_LOCALNRANKS",
|
||||
"I_MPI_LOCAL_SIZE",
|
||||
"SLURM_NTASKS_PER_NODE",
|
||||
):
|
||||
value = os.environ.get(name)
|
||||
if not value:
|
||||
continue
|
||||
try:
|
||||
return max(1, int(str(value).split("(", 1)[0]))
|
||||
except ValueError:
|
||||
pass
|
||||
return 1
|
||||
|
||||
|
||||
def _parse_cpu_list(text):
|
||||
cpus = set()
|
||||
for item in text.split(","):
|
||||
item = item.strip()
|
||||
if not item:
|
||||
continue
|
||||
if "-" in item:
|
||||
start, stop = item.split("-", 1)
|
||||
cpus.update(range(int(start), int(stop) + 1))
|
||||
else:
|
||||
cpus.add(int(item))
|
||||
return cpus
|
||||
|
||||
|
||||
class CpuTensorNet(QibotnBackend, NumpyBackend):
|
||||
"""CPU replacement for the cutensornet runcard execution surface.
|
||||
|
||||
The backend preserves the high-level runcard knobs used by the GPU backend:
|
||||
``MPI_enabled``, ``MPS_enabled`` and ``expectation_enabled``. Generic TN
|
||||
work is delegated to quimb on CPU; MPS expectation uses the Vidal fast path
|
||||
when the circuit is nearest-neighbor and falls back to quimb otherwise.
|
||||
"""
|
||||
|
||||
def __init__(self, runcard=None):
|
||||
super().__init__()
|
||||
self.name = "qibotn"
|
||||
self.platform = "cpu"
|
||||
self.precision = "double"
|
||||
self.configure_tn_simulation(runcard)
|
||||
|
||||
def configure_tn_simulation(self, runcard=None):
|
||||
runcard = {} if runcard is None else runcard
|
||||
self.rank = 0
|
||||
self.MPI_enabled = bool(runcard.get("MPI_enabled", False))
|
||||
self.NCCL_enabled = bool(runcard.get("NCCL_enabled", False))
|
||||
if self.NCCL_enabled:
|
||||
raise_error(NotImplementedError, "NCCL is only available for GPU backends.")
|
||||
|
||||
expectation = runcard.get("expectation_enabled", False)
|
||||
if expectation is True:
|
||||
self.expectation_enabled = True
|
||||
self.observable = None
|
||||
elif expectation is False:
|
||||
self.expectation_enabled = False
|
||||
self.observable = None
|
||||
elif isinstance(expectation, (dict, hamiltonians.SymbolicHamiltonian)):
|
||||
self.expectation_enabled = True
|
||||
self.observable = expectation
|
||||
else:
|
||||
raise TypeError("expectation_enabled has an unexpected type")
|
||||
|
||||
mps = _as_bool_or_dict(runcard.get("MPS_enabled", False), "MPS_enabled")
|
||||
self.MPS_enabled = bool(mps)
|
||||
self.mps_options = mps if isinstance(mps, dict) else {}
|
||||
|
||||
self.max_bond_dimension = runcard.get(
|
||||
"max_bond_dimension",
|
||||
self.mps_options.get("max_bond_dimension", 512),
|
||||
)
|
||||
self.cut_ratio = runcard.get(
|
||||
"cut_ratio",
|
||||
self.mps_options.get(
|
||||
"cut_ratio",
|
||||
self.mps_options.get("svd_method", {}).get("abs_cutoff", 1e-12),
|
||||
),
|
||||
)
|
||||
self.tensor_module = runcard.get("tensor_module", "torch")
|
||||
self.dtype = runcard.get("dtype", "complex128")
|
||||
self.compile_circuit = bool(runcard.get("compile_circuit", False))
|
||||
self.preprocess = bool(runcard.get("preprocess", False))
|
||||
self.mpi_term_batch_size = runcard.get(
|
||||
"mpi_term_batch_size",
|
||||
runcard.get("term_batch_size", None),
|
||||
)
|
||||
self.torch_threads = runcard.get("torch_threads", None)
|
||||
self.quimb_backend = runcard.get("quimb_backend", "torch")
|
||||
self.contraction_optimizer = runcard.get("contraction_optimizer", "auto-hq")
|
||||
self.parallel_opts = runcard.get("parallel_opts", {})
|
||||
self.parallel_stats = []
|
||||
|
||||
def execute_circuit(
|
||||
self,
|
||||
circuit,
|
||||
initial_state=None,
|
||||
nshots=None,
|
||||
prob_type=None,
|
||||
return_array=False,
|
||||
**prob_kwargs,
|
||||
):
|
||||
if initial_state is not None:
|
||||
raise_error(NotImplementedError, "QiboTN CPU backend does not support initial state.")
|
||||
|
||||
if self.torch_threads is not None and self.tensor_module == "torch":
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(self.torch_threads)
|
||||
|
||||
if self.expectation_enabled:
|
||||
value = self.expectation(circuit, self.observable)
|
||||
if self.MPI_enabled and self.rank > 0:
|
||||
return np.asarray([0], dtype=np.int64)
|
||||
dtype = np.complex128 if np.iscomplexobj(value) else np.float64
|
||||
return np.asarray([value], dtype=dtype)
|
||||
|
||||
backend = self._quimb_backend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz="mps" if self.MPS_enabled else None,
|
||||
max_bond_dimension=self.max_bond_dimension if self.MPS_enabled else None,
|
||||
svd_cutoff=self.cut_ratio,
|
||||
)
|
||||
return backend.execute_circuit(
|
||||
circuit=circuit,
|
||||
nshots=nshots,
|
||||
return_array=return_array,
|
||||
)
|
||||
|
||||
def expectation(self, circuit, observable=None, preprocess=None, compile_circuit=None):
|
||||
mpo_tensors = _observable_mpo_tensors(observable, circuit.nqubits)
|
||||
if mpo_tensors is None:
|
||||
observable = check_observable(observable, circuit.nqubits)
|
||||
use_preprocess = self.preprocess if preprocess is None else preprocess
|
||||
if mpo_tensors is not None and not self.MPS_enabled:
|
||||
raise_error(
|
||||
NotImplementedError,
|
||||
"MPO expectation is currently supported only by the Vidal MPS path.",
|
||||
)
|
||||
|
||||
if self.MPS_enabled:
|
||||
reason = _unsupported_reason(circuit)
|
||||
if reason is None or self.compile_circuit or use_preprocess:
|
||||
return self._vidal_expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=use_preprocess,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
|
||||
backend = self._quimb_backend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz="mps" if self.MPS_enabled else None,
|
||||
max_bond_dimension=self.max_bond_dimension if self.MPS_enabled else None,
|
||||
svd_cutoff=self.cut_ratio,
|
||||
)
|
||||
if self.MPI_enabled:
|
||||
return self._quimb_expectation_mpi(backend, circuit, observable)
|
||||
return self._quimb_expectation_processpool(backend, circuit, observable)
|
||||
|
||||
def _vidal_expectation(
|
||||
self, circuit, observable, preprocess=False, compile_circuit=None
|
||||
):
|
||||
if compile_circuit is None:
|
||||
compile_circuit = self.compile_circuit
|
||||
if preprocess:
|
||||
if self.MPI_enabled:
|
||||
from mpi4py import MPI
|
||||
|
||||
self.rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
compile_circuit=compile_circuit,
|
||||
mpi_approach="CT" if self.MPI_enabled else "SR",
|
||||
mpi_term_batch_size=self.mpi_term_batch_size,
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
self.rank = getattr(backend, "rank", self.rank)
|
||||
self.last_truncation_error = getattr(
|
||||
backend, "last_truncation_error", np.nan
|
||||
)
|
||||
self.last_max_truncation_error = getattr(
|
||||
backend, "last_max_truncation_error", np.nan
|
||||
)
|
||||
return value
|
||||
|
||||
mpo_tensors = _observable_mpo_tensors(observable, circuit.nqubits)
|
||||
if self.MPI_enabled:
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
self.rank = comm.Get_rank()
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
self.last_truncation_error = float(executor.global_truncation_error())
|
||||
self.last_max_truncation_error = float(
|
||||
executor.global_max_truncation_error()
|
||||
)
|
||||
if mpo_tensors is not None:
|
||||
value = executor.expectation_mpo_root(mpo_tensors)
|
||||
else:
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(observable)
|
||||
value = executor.expectation_mpo_root(
|
||||
_operator_terms_to_mpo(terms, circuit.nqubits)
|
||||
)
|
||||
return np.nan if self.rank != 0 else value
|
||||
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
self.last_truncation_error = float(executor.truncation_error)
|
||||
self.last_max_truncation_error = float(executor.max_truncation_error)
|
||||
if mpo_tensors is not None:
|
||||
return executor.expectation_mpo(mpo_tensors)
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(observable)
|
||||
return executor.expectation_mpo(_operator_terms_to_mpo(terms, circuit.nqubits))
|
||||
|
||||
def _quimb_backend(self):
|
||||
import qibotn.backends.quimb as qmb
|
||||
|
||||
return qmb.BACKENDS[self.quimb_backend](
|
||||
quimb_backend=self.quimb_backend,
|
||||
contraction_optimizer=self.contraction_optimizer,
|
||||
)
|
||||
|
||||
def _bind_rank_to_numa_domain(self, rank):
|
||||
self.numa_domain = _bind_numa_node(rank)
|
||||
|
||||
def _default_search_workers(self, nranks=1):
|
||||
if self.torch_threads:
|
||||
return max(1, int(self.torch_threads))
|
||||
return max(1, (os.cpu_count() or 1) // max(1, nranks))
|
||||
|
||||
def _quimb_expectation_processpool(self, backend, circuit, observable):
|
||||
return self._quimb_expectation_search(
|
||||
backend,
|
||||
circuit,
|
||||
observable,
|
||||
method="processpool",
|
||||
comm=None,
|
||||
)
|
||||
|
||||
def _quimb_expectation_mpi(self, backend, circuit, observable):
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
self.rank = comm.Get_rank()
|
||||
self._bind_rank_to_numa_domain(self.rank)
|
||||
|
||||
return self._quimb_expectation_search(
|
||||
backend,
|
||||
circuit,
|
||||
observable,
|
||||
method="mpi",
|
||||
comm=comm,
|
||||
)
|
||||
|
||||
def _quimb_expectation_search(self, backend, circuit, observable, method, comm=None):
|
||||
rank = comm.Get_rank() if comm is not None else 0
|
||||
size = comm.Get_size() if comm is not None else 1
|
||||
self.rank = rank
|
||||
|
||||
from qibotn.observables import extract_gates_and_qubits
|
||||
from qibotn.parallel import (
|
||||
contraction_tree_costs,
|
||||
parallel_contract,
|
||||
parallel_path_search,
|
||||
)
|
||||
from qibotn.backends.quimb import (
|
||||
PAULI_DENSE_MAX_QUBITS,
|
||||
_pauli_term_to_dense_operator,
|
||||
pauli_product_expectation_tn,
|
||||
)
|
||||
|
||||
opts = dict(self.parallel_opts)
|
||||
user_slicing_opts = opts.get("slicing_opts")
|
||||
search_workers = opts.get("search_workers", self._default_search_workers(size))
|
||||
search_repeats = opts.get("max_repeats", 128)
|
||||
search_time = opts.get("max_time", 60)
|
||||
search_backend = opts.get("search_backend")
|
||||
dask_address = opts.get("dask_address")
|
||||
dask_close_workers = bool(opts.get("dask_close_workers", False))
|
||||
print_stats = bool(opts.get("print_stats", False))
|
||||
debug_trials = bool(opts.get("debug_trials", False))
|
||||
search_only = bool(opts.get("search_only", False))
|
||||
save_tree_path = opts.get("save_tree_path")
|
||||
load_tree_path = opts.get("load_tree_path")
|
||||
loaded_trees = None
|
||||
saved_trees = []
|
||||
saved_costs = []
|
||||
|
||||
if load_tree_path:
|
||||
with Path(load_tree_path).open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
loaded_trees = payload["trees"] if isinstance(payload, dict) else payload
|
||||
if not isinstance(loaded_trees, (list, tuple)):
|
||||
loaded_trees = [loaded_trees]
|
||||
|
||||
qc = backend._qibo_circuit_to_quimb(
|
||||
circuit,
|
||||
quimb_circuit_type=backend.circuit_ansatz,
|
||||
gate_opts={
|
||||
"max_bond": self.max_bond_dimension,
|
||||
"cutoff": self.cut_ratio,
|
||||
},
|
||||
)
|
||||
|
||||
total_value = 0.0 + 0.0j
|
||||
terms = extract_gates_and_qubits(observable)
|
||||
for term_index, (coeff, factors) in enumerate(terms):
|
||||
if not factors:
|
||||
if self.rank == 0:
|
||||
total_value += coeff
|
||||
continue
|
||||
|
||||
if len(factors) > PAULI_DENSE_MAX_QUBITS:
|
||||
tn = pauli_product_expectation_tn(
|
||||
qc,
|
||||
factors,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
)
|
||||
else:
|
||||
op, where = _pauli_term_to_dense_operator(factors)
|
||||
tn = qc.local_expectation(
|
||||
op,
|
||||
where,
|
||||
rehearse="tn",
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
)
|
||||
slicing_opts = self._mpi_slicing_opts(
|
||||
user_slicing_opts,
|
||||
)
|
||||
|
||||
if loaded_trees is not None:
|
||||
if term_index >= len(loaded_trees):
|
||||
raise ValueError(
|
||||
f"Loaded tree file has {len(loaded_trees)} tree(s), "
|
||||
f"but term {term_index} was requested."
|
||||
)
|
||||
tree = loaded_trees[term_index]
|
||||
search_seconds = 0.0
|
||||
if self.rank == 0 and print_stats:
|
||||
print(
|
||||
f"tn_tree_loaded term={term_index} path={load_tree_path}",
|
||||
flush=True,
|
||||
)
|
||||
else:
|
||||
search_start = time.perf_counter()
|
||||
tree = parallel_path_search(
|
||||
tn,
|
||||
tn.outer_inds(),
|
||||
method="dask" if method != "mpi" and search_backend == "dask" else method,
|
||||
total_repeats=search_repeats,
|
||||
max_time=search_time,
|
||||
n_workers=search_workers,
|
||||
slicing_opts=slicing_opts,
|
||||
trial_timeout=opts.get("trial_timeout"),
|
||||
search_backend=search_backend,
|
||||
dask_address=dask_address,
|
||||
debug_trials=debug_trials,
|
||||
dask_close_workers=dask_close_workers,
|
||||
)
|
||||
search_seconds = time.perf_counter() - search_start
|
||||
if tree is None:
|
||||
raise RuntimeError("Failed to find a contraction tree for CPU TN MPI.")
|
||||
if self.parallel_opts.get("contract_implementation") == "cpp":
|
||||
from qibotn.torch_contractor import prepare_torch_cpp_contractor
|
||||
|
||||
prepare_torch_cpp_contractor(tree)
|
||||
|
||||
path_cost = contraction_tree_costs(tree)
|
||||
search_stats = getattr(tree, "qibotn_search_stats", {})
|
||||
if save_tree_path and loaded_trees is None:
|
||||
saved_trees.append(tree)
|
||||
saved_costs.append(path_cost)
|
||||
if self.rank == 0 and print_stats:
|
||||
print(
|
||||
"tn_search_done "
|
||||
f"term={term_index} "
|
||||
f"search_seconds={search_seconds:.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', search_repeats)} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={path_cost['nslices']} "
|
||||
f"log10_flops={path_cost['log10_flops']:.3f} "
|
||||
f"log10_write={path_cost['log10_write']:.3f} "
|
||||
f"log2_size={path_cost['log2_size']:.3f} "
|
||||
f"log10_combo={path_cost['log10_combo']:.3f} "
|
||||
f"peak_memory_gib={path_cost['peak_memory_gib']:.6g}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if search_only:
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": int(getattr(tree, "multiplicity", 1)),
|
||||
"slice_assignment": "search_only",
|
||||
"rank_slices": [],
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": 0.0,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
if comm is None and int(getattr(tree, "multiplicity", 1)) <= 1:
|
||||
if self.rank == 0:
|
||||
contract_start = time.perf_counter()
|
||||
value = self._contract_term_unsliced(tn, tree, backend)
|
||||
contract_seconds = time.perf_counter() - contract_start
|
||||
if print_stats:
|
||||
print(
|
||||
"tn_contract_done "
|
||||
f"term={term_index} "
|
||||
f"contract_seconds={contract_seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": 1,
|
||||
"slice_assignment": "root",
|
||||
"rank_slices": [1] + [0] * (size - 1),
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
)
|
||||
total_value += coeff * complex(value)
|
||||
continue
|
||||
|
||||
if comm is None:
|
||||
contract_start = time.perf_counter()
|
||||
value = self._contract_term_unsliced(tn, tree, backend)
|
||||
contract_seconds = time.perf_counter() - contract_start
|
||||
if print_stats:
|
||||
print(
|
||||
"tn_contract_done "
|
||||
f"term={term_index} "
|
||||
f"contract_seconds={contract_seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": int(getattr(tree, "multiplicity", 1)),
|
||||
"slice_assignment": "local",
|
||||
"rank_slices": [int(getattr(tree, "multiplicity", 1))],
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
)
|
||||
total_value += coeff * complex(np.asarray(value).reshape(-1)[0])
|
||||
continue
|
||||
|
||||
contract_start = time.perf_counter()
|
||||
arrays = self._term_arrays(tn, backend)
|
||||
value, stats = parallel_contract(
|
||||
tree,
|
||||
arrays,
|
||||
method="mpi",
|
||||
comm=comm,
|
||||
return_stats=True,
|
||||
implementation=self.parallel_opts.get("contract_implementation"),
|
||||
)
|
||||
contract_seconds = time.perf_counter() - contract_start
|
||||
gathered_stats = comm.gather(stats, root=0)
|
||||
if rank == 0:
|
||||
if print_stats:
|
||||
print(
|
||||
"tn_contract_done "
|
||||
f"term={term_index} "
|
||||
f"contract_seconds={contract_seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
self.parallel_stats.append(
|
||||
{
|
||||
"term_index": term_index,
|
||||
"term_factors": tuple(factors),
|
||||
"path_cost": path_cost,
|
||||
"search_stats": search_stats,
|
||||
"tree_slices": stats.nslices,
|
||||
"slice_assignment": stats.assignment,
|
||||
"rank_slices": [
|
||||
item.local_slices for item in gathered_stats
|
||||
],
|
||||
"search_seconds": search_seconds,
|
||||
"contract_seconds": contract_seconds,
|
||||
"search_workers": search_workers,
|
||||
"search_repeats": search_repeats,
|
||||
"search_time": search_time,
|
||||
"search_backend": search_backend or method,
|
||||
"dask_address": dask_address,
|
||||
"numa_domain": getattr(self, "numa_domain", None),
|
||||
}
|
||||
)
|
||||
total_value += coeff * complex(np.asarray(value).reshape(-1)[0])
|
||||
|
||||
if self.rank == 0 and save_tree_path and loaded_trees is None:
|
||||
path = Path(save_tree_path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("wb") as f:
|
||||
pickle.dump(
|
||||
{
|
||||
"trees": saved_trees,
|
||||
"costs": saved_costs,
|
||||
"nterms": len(saved_trees),
|
||||
},
|
||||
f,
|
||||
protocol=pickle.HIGHEST_PROTOCOL,
|
||||
)
|
||||
if print_stats:
|
||||
print(
|
||||
f"tn_tree_saved path={save_tree_path} nterms={len(saved_trees)}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if search_only:
|
||||
return np.nan
|
||||
|
||||
return np.nan if rank != 0 else float(np.real(total_value))
|
||||
|
||||
def _contract_term_unsliced(self, tn, tree, backend):
|
||||
contract_implementation = self.parallel_opts.get("contract_implementation")
|
||||
if contract_implementation == "cpp":
|
||||
if backend.backend != "torch":
|
||||
raise ValueError("contract_implementation='cpp' requires torch backend.")
|
||||
from qibotn.backends.quimb import _torch_cpu_array, _torch_dtype
|
||||
from qibotn.torch_contractor import contract_tree_cpp
|
||||
|
||||
arrays = [
|
||||
_torch_cpu_array(array, dtype=_torch_dtype(self.dtype))
|
||||
for array in tn.arrays
|
||||
]
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
if nslices > 1:
|
||||
total = None
|
||||
for slice_id in range(nslices):
|
||||
value = contract_tree_cpp(tree, tree.slice_arrays(arrays, slice_id))
|
||||
total = value if total is None else total + value
|
||||
return total
|
||||
return contract_tree_cpp(tree, arrays)
|
||||
|
||||
if backend.backend == "torch":
|
||||
from qibotn.backends.quimb import _torch_cpu_array, _torch_dtype
|
||||
|
||||
for tensor in tn.tensors:
|
||||
tensor._data = _torch_cpu_array(
|
||||
tensor._data,
|
||||
dtype=_torch_dtype(self.dtype),
|
||||
)
|
||||
return tn.contract(
|
||||
all,
|
||||
output_inds=(),
|
||||
optimize=tree,
|
||||
backend="torch",
|
||||
implementation=contract_implementation,
|
||||
)
|
||||
|
||||
return tn.contract(
|
||||
all,
|
||||
output_inds=(),
|
||||
optimize=tree,
|
||||
backend=backend.backend,
|
||||
implementation=contract_implementation,
|
||||
)
|
||||
|
||||
def _mpi_slicing_opts(self, user_slicing_opts):
|
||||
return None if user_slicing_opts is None else dict(user_slicing_opts)
|
||||
|
||||
def _term_arrays(self, tn, backend):
|
||||
if backend.backend == "torch":
|
||||
from qibotn.backends.quimb import _torch_cpu_array, _torch_dtype
|
||||
|
||||
return [
|
||||
_torch_cpu_array(array, dtype=_torch_dtype(self.dtype))
|
||||
for array in tn.arrays
|
||||
]
|
||||
from qibotn.backends.quimb import _numpy_dtype
|
||||
|
||||
return [backend.engine.asarray(array, dtype=_numpy_dtype(self.dtype)) for array in tn.arrays]
|
||||
@@ -9,10 +9,8 @@ import qmatchatea
|
||||
import qtealeaves
|
||||
from qibo.backends import NumpyBackend
|
||||
from qibo.config import raise_error
|
||||
from qmatchatea.utils import MPISettings
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.observables import check_observable
|
||||
from qibotn.result import TensorNetworkResult
|
||||
|
||||
|
||||
@@ -40,14 +38,6 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
trunc_tracking_mode: str = "C",
|
||||
svd_control: str = "A",
|
||||
ini_bond_dimension: int = 1,
|
||||
tensor_module: str = "numpy",
|
||||
compile_circuit: bool = False,
|
||||
cache_gate_tensors: bool = True,
|
||||
track_memory: bool = False,
|
||||
mpi_approach: str = "SR",
|
||||
mpi_num_procs: int = 1,
|
||||
mpi_where_barriers: int = -1,
|
||||
mpi_isometrization: int = -1,
|
||||
):
|
||||
"""Configure TN simulation given Quantum Matcha Tea interface.
|
||||
|
||||
@@ -85,18 +75,6 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
ini_bond_dimension=ini_bond_dimension,
|
||||
)
|
||||
self.ansatz = ansatz
|
||||
self.tensor_module = tensor_module
|
||||
self.compile_circuit = compile_circuit
|
||||
self.cache_gate_tensors = cache_gate_tensors
|
||||
self.track_memory = track_memory
|
||||
self.mpi_settings = MPISettings(
|
||||
mpi_approach=mpi_approach,
|
||||
num_procs=mpi_num_procs,
|
||||
where_barriers=mpi_where_barriers,
|
||||
isometrization=mpi_isometrization,
|
||||
)
|
||||
if hasattr(self, "qmatchatea_backend"):
|
||||
self._setup_backend_specifics()
|
||||
|
||||
def _setup_backend_specifics(self):
|
||||
"""Configure qmatchatea QCBackend object."""
|
||||
@@ -110,15 +88,12 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
else "Z" if self.precision == "double" else "A"
|
||||
)
|
||||
|
||||
# TODO: once MPI is available for Python, integrate it here
|
||||
self.qmatchatea_backend = qmatchatea.QCBackend(
|
||||
precision=qmatchatea_precision,
|
||||
device=qmatchatea_device,
|
||||
ansatz=self.ansatz,
|
||||
tensor_module=self.tensor_module,
|
||||
mpi_settings=self.mpi_settings,
|
||||
)
|
||||
self.qmatchatea_backend.cache_gate_tensors = self.cache_gate_tensors
|
||||
self.qmatchatea_backend.track_memory = self.track_memory
|
||||
|
||||
def execute_circuit(
|
||||
self,
|
||||
@@ -218,7 +193,7 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
statevector=statevector,
|
||||
)
|
||||
|
||||
def expectation(self, circuit, observable, preprocess=True, compile_circuit=None):
|
||||
def expectation(self, circuit, observable):
|
||||
"""Compute the expectation value of a Qibo-friendly ``observable`` on
|
||||
the Tensor Network constructed from a Qibo ``circuit``.
|
||||
|
||||
@@ -241,14 +216,8 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
simulation setup.
|
||||
"""
|
||||
|
||||
observable = check_observable(observable, circuit.nqubits)
|
||||
|
||||
# From Qibo to Qiskit
|
||||
circuit = self._qibocirc_to_qiskitcirc(
|
||||
circuit,
|
||||
preprocess=preprocess,
|
||||
compile_circuit=compile_circuit,
|
||||
)
|
||||
circuit = self._qibocirc_to_qiskitcirc(circuit)
|
||||
run_qk_params = qmatchatea.preprocessing.qk_transpilation_params(False)
|
||||
|
||||
operators = qmatchatea.QCOperators()
|
||||
@@ -265,37 +234,19 @@ class QMatchaTeaBackend(QibotnBackend, NumpyBackend):
|
||||
operators=operators,
|
||||
)
|
||||
|
||||
if self.qmatchatea_backend.mpi_approach != "SR":
|
||||
from qtealeaves.tooling.mpisupport import MPI
|
||||
|
||||
if MPI is not None and MPI.COMM_WORLD.Get_rank() != 0:
|
||||
return np.nan
|
||||
|
||||
return np.real(results.observables["custom_hamiltonian"])
|
||||
|
||||
def _qibocirc_to_qiskitcirc(
|
||||
self, qibo_circuit, preprocess=True, compile_circuit=None
|
||||
) -> qiskit.QuantumCircuit:
|
||||
def _qibocirc_to_qiskitcirc(self, qibo_circuit) -> qiskit.QuantumCircuit:
|
||||
"""Convert a Qibo Circuit into a Qiskit Circuit."""
|
||||
# Convert the circuit to QASM 2.0 to qiskit
|
||||
qasm_circuit = qibo_circuit.to_qasm()
|
||||
qiskit_circuit = qiskit.QuantumCircuit.from_qasm_str(qasm_circuit)
|
||||
|
||||
if compile_circuit is None:
|
||||
compile_circuit = self.compile_circuit
|
||||
|
||||
if not preprocess:
|
||||
if compile_circuit:
|
||||
qiskit_circuit = qmatchatea.tensor_compiler(qiskit_circuit)
|
||||
return qiskit_circuit
|
||||
|
||||
# Transpile the circuit to adapt it to the linear structure of the MPS,
|
||||
# with the constraint of having only the gates basis_gates
|
||||
qiskit_circuit = qmatchatea.preprocessing.preprocess(
|
||||
qiskit_circuit,
|
||||
qk_params=qmatchatea.preprocessing.qk_transpilation_params(
|
||||
tensor_compiler=compile_circuit
|
||||
),
|
||||
qk_params=qmatchatea.preprocessing.qk_transpilation_params(),
|
||||
)
|
||||
return qiskit_circuit
|
||||
|
||||
|
||||
@@ -37,129 +37,8 @@ GATE_MAP = {
|
||||
"measure": "measure",
|
||||
}
|
||||
|
||||
PAULI_DENSE_MAX_QUBITS = 8
|
||||
|
||||
|
||||
def _torch_cpu_array(data, dtype=None):
|
||||
"""Convert array-like data to a contiguous CPU torch tensor."""
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
if isinstance(data, torch.Tensor):
|
||||
x = data
|
||||
else:
|
||||
array = np.asarray(data)
|
||||
if any(stride < 0 for stride in array.strides):
|
||||
array = np.ascontiguousarray(array)
|
||||
x = torch.from_numpy(array)
|
||||
|
||||
if x.device.type != "cpu":
|
||||
x = x.cpu()
|
||||
if dtype is not None and x.dtype != dtype:
|
||||
x = x.to(dtype)
|
||||
if not x.is_contiguous():
|
||||
x = x.contiguous()
|
||||
return x
|
||||
|
||||
|
||||
def _torch_dtype(dtype):
|
||||
import torch
|
||||
|
||||
if dtype in ("complex64", "single"):
|
||||
return torch.complex64
|
||||
return torch.complex128
|
||||
|
||||
|
||||
def _numpy_dtype(dtype):
|
||||
import numpy as np
|
||||
|
||||
if dtype in ("complex64", "single"):
|
||||
return np.complex64
|
||||
return np.complex128
|
||||
|
||||
|
||||
def _arrays_to_backend(arrays, backend, engine, dtype="complex128"):
|
||||
if backend == "torch":
|
||||
return [_torch_cpu_array(array, dtype=_torch_dtype(dtype)) for array in arrays]
|
||||
return [engine.asarray(array, dtype=_numpy_dtype(dtype)) for array in arrays]
|
||||
|
||||
|
||||
def _pauli_term_to_dense_operator(factors):
|
||||
op = None
|
||||
where = []
|
||||
for qubit, gate_name in factors:
|
||||
pauli = qu.pauli(gate_name.lower())
|
||||
op = pauli if op is None else op & pauli
|
||||
where.append(qubit)
|
||||
return op, tuple(where)
|
||||
|
||||
|
||||
def pauli_product_expectation_tn(
|
||||
quimb_circuit,
|
||||
factors,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
simplify_equalize_norms=True,
|
||||
):
|
||||
"""Build the scalar TN for ``<psi|P|psi>`` without dense Pauli strings."""
|
||||
import numpy as np
|
||||
|
||||
op_by_site = {
|
||||
int(qubit): qu.pauli(str(gate_name).lower())
|
||||
for qubit, gate_name in factors
|
||||
if str(gate_name).upper() != "I"
|
||||
}
|
||||
ket = quimb_circuit.get_psi_simplified(
|
||||
seq=simplify_sequence,
|
||||
atol=simplify_atol,
|
||||
equalize_norms=simplify_equalize_norms,
|
||||
)
|
||||
bra = ket.conj().reindex(
|
||||
{
|
||||
quimb_circuit.ket_site_ind(qubit): quimb_circuit.bra_site_ind(qubit)
|
||||
for qubit in range(quimb_circuit.N)
|
||||
}
|
||||
)
|
||||
|
||||
tn = bra | ket
|
||||
identity = np.eye(2, dtype=complex)
|
||||
for qubit in range(quimb_circuit.N):
|
||||
data = op_by_site.get(qubit, identity)
|
||||
tn |= qtn.Tensor(
|
||||
data=data,
|
||||
inds=(
|
||||
quimb_circuit.bra_site_ind(qubit),
|
||||
quimb_circuit.ket_site_ind(qubit),
|
||||
),
|
||||
)
|
||||
|
||||
tn.full_simplify_(
|
||||
output_inds=(),
|
||||
seq=simplify_sequence,
|
||||
atol=simplify_atol,
|
||||
equalize_norms=simplify_equalize_norms,
|
||||
)
|
||||
return tn
|
||||
|
||||
|
||||
def pauli_product_expectation(
|
||||
quimb_circuit,
|
||||
factors,
|
||||
backend,
|
||||
optimize,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
):
|
||||
tn = pauli_product_expectation_tn(
|
||||
quimb_circuit,
|
||||
factors,
|
||||
simplify_sequence=simplify_sequence,
|
||||
simplify_atol=simplify_atol,
|
||||
)
|
||||
return tn.contract(all, output_inds=(), optimize=optimize, backend=backend)
|
||||
|
||||
|
||||
def __init__(self, quimb_backend="torch", contraction_optimizer="auto-hq"):
|
||||
def __init__(self, quimb_backend="numpy", contraction_optimizer="auto-hq"):
|
||||
super(self.__class__, self).__init__()
|
||||
|
||||
self.name = "qibotn"
|
||||
@@ -212,7 +91,7 @@ def circuit_ansatz(self):
|
||||
|
||||
|
||||
def setup_backend_specifics(
|
||||
self, quimb_backend="torch", contractions_optimizer="auto-hq"
|
||||
self, quimb_backend="numpy", contractions_optimizer="auto-hq"
|
||||
):
|
||||
"""Setup backend specifics.
|
||||
Args:
|
||||
@@ -288,7 +167,7 @@ def execute_circuit(
|
||||
raise_error(ValueError, "Initial state not None supported only for MPS ansatz.")
|
||||
|
||||
circ_quimb = self.circuit_ansatz.from_openqasm2_str(
|
||||
circuit.to_qasm(), psi0=initial_state, gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff}
|
||||
circuit.to_qasm(), psi0=initial_state
|
||||
)
|
||||
|
||||
if nshots:
|
||||
@@ -307,16 +186,7 @@ def execute_circuit(
|
||||
else:
|
||||
frequencies = None
|
||||
measured_probabilities = None
|
||||
'''
|
||||
if return_array:
|
||||
if self.ansatz == "mps":
|
||||
psi = circ_quimb.psi
|
||||
statevector = psi.to_dense().reshape(-1)
|
||||
else:
|
||||
statevector = circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer)
|
||||
else:
|
||||
statevector = None
|
||||
'''
|
||||
|
||||
statevector = (
|
||||
circ_quimb.to_dense(backend=self.backend, optimize=self.contractions_optimizer)
|
||||
if return_array
|
||||
@@ -421,19 +291,7 @@ def _qibo_circuit_to_quimb(
|
||||
quimb_gate_name = GATE_MAP.get(gate_name, None)
|
||||
if quimb_gate_name == "measure":
|
||||
continue
|
||||
if gate_name == "cu1":
|
||||
theta = gate.parameters[0]
|
||||
c, t = gate.qubits
|
||||
circ.apply_gate("RZ", theta / 2, c)
|
||||
circ.apply_gate("RZ", theta / 2, t)
|
||||
circ.apply_gate("CNOT", c, t)
|
||||
circ.apply_gate("RZ", -theta / 2, t)
|
||||
circ.apply_gate("CNOT", c, t)
|
||||
continue
|
||||
if quimb_gate_name is None:
|
||||
if hasattr(gate, "matrix"):
|
||||
circ.apply_gate_raw(gate.matrix(), getattr(gate, "qubits", ()))
|
||||
continue
|
||||
raise_error(ValueError, f"Gate {gate_name} not supported in Quimb backend.")
|
||||
|
||||
params = getattr(gate, "parameters", ())
|
||||
@@ -476,173 +334,6 @@ def _string_to_quimb_operator(self, op_str):
|
||||
return op
|
||||
|
||||
|
||||
def expectation(self, circuit, observable, parallel=None, parallel_opts=None):
|
||||
"""
|
||||
Compute expectation value with optional parallel acceleration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
circuit : qibo.models.Circuit
|
||||
The quantum circuit.
|
||||
observable : qibo.hamiltonians.SymbolicHamiltonian or form
|
||||
The observable to measure.
|
||||
parallel : str, optional
|
||||
Parallelization method: 'mpi', 'processpool', or None (default).
|
||||
parallel_opts : dict, optional
|
||||
Options for parallel execution:
|
||||
- max_repeats: int (default 1024)
|
||||
- max_time: int (default 300)
|
||||
- search_workers: int (default 48, processpool only)
|
||||
- mpi_contract: bool (default False, use MPI for contraction)
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
The expectation value.
|
||||
"""
|
||||
from qibotn.observables import check_observable, extract_gates_and_qubits
|
||||
|
||||
if parallel_opts is None:
|
||||
parallel_opts = {}
|
||||
|
||||
observable = check_observable(observable, circuit.nqubits)
|
||||
|
||||
if parallel is None:
|
||||
# Use original implementation
|
||||
from qibotn.observables import extract_gates_and_qubits
|
||||
all_terms = extract_gates_and_qubits(observable)
|
||||
|
||||
qc = self._qibo_circuit_to_quimb(
|
||||
circuit,
|
||||
quimb_circuit_type=self.circuit_ansatz,
|
||||
gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff},
|
||||
)
|
||||
|
||||
exp_val = 0.0
|
||||
for coeff, factors in all_terms:
|
||||
if len(factors) > PAULI_DENSE_MAX_QUBITS:
|
||||
val = pauli_product_expectation(
|
||||
qc,
|
||||
factors,
|
||||
backend=self.backend,
|
||||
optimize=self.contractions_optimizer,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
)
|
||||
else:
|
||||
op, where = _pauli_term_to_dense_operator(factors)
|
||||
val = qc.local_expectation(
|
||||
op, where,
|
||||
backend=self.backend,
|
||||
optimize=self.contractions_optimizer,
|
||||
simplify_sequence="ADCRS",
|
||||
simplify_atol=1e-12,
|
||||
)
|
||||
exp_val += coeff * val
|
||||
|
||||
return self.real(exp_val)
|
||||
|
||||
else:
|
||||
# Use parallel implementation
|
||||
return self._expectation_parallel(circuit, observable, parallel, parallel_opts)
|
||||
|
||||
|
||||
def _expectation_parallel(self, circuit, observable, method, opts):
|
||||
"""Parallel expectation value computation."""
|
||||
from qibotn.observables import extract_gates_and_qubits
|
||||
from qibotn.parallel import parallel_path_search, parallel_contract
|
||||
import torch
|
||||
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
comm = MPI.COMM_WORLD if method == 'mpi' else None
|
||||
rank = comm.Get_rank() if comm else 0
|
||||
size = comm.Get_size() if comm else 1
|
||||
except ImportError:
|
||||
comm, rank, size = None, 0, 1
|
||||
|
||||
max_repeats = opts.get('max_repeats', 1024)
|
||||
max_time = opts.get('max_time', 300)
|
||||
search_workers = opts.get('search_workers', 48)
|
||||
mpi_contract = opts.get('mpi_contract', False)
|
||||
torch_threads = opts.get('torch_threads', None)
|
||||
slicing_opts = opts.get('slicing_opts', None)
|
||||
trial_timeout = opts.get('trial_timeout', None)
|
||||
|
||||
qc = self._qibo_circuit_to_quimb(
|
||||
circuit,
|
||||
quimb_circuit_type=self.circuit_ansatz,
|
||||
gate_opts={"max_bond": self.max_bond_dimension, "cutoff": self.svd_cutoff},
|
||||
)
|
||||
|
||||
all_terms = extract_gates_and_qubits(observable)
|
||||
my_terms = all_terms[rank::size]
|
||||
|
||||
if method == 'mpi' and comm:
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
elif torch_threads:
|
||||
torch.set_num_threads(torch_threads)
|
||||
|
||||
my_exp = 0.0
|
||||
for coeff, factors in my_terms:
|
||||
if len(factors) > PAULI_DENSE_MAX_QUBITS:
|
||||
tn = pauli_product_expectation_tn(qc, factors)
|
||||
else:
|
||||
op, where = _pauli_term_to_dense_operator(factors)
|
||||
tn = qc.local_expectation(op, where, rehearse='tn')
|
||||
|
||||
tree = parallel_path_search(
|
||||
tn, tn.outer_inds(),
|
||||
method=method,
|
||||
total_repeats=max_repeats,
|
||||
max_time=max_time,
|
||||
n_workers=search_workers,
|
||||
slicing_opts=slicing_opts,
|
||||
trial_timeout=trial_timeout,
|
||||
)
|
||||
|
||||
if tree is None:
|
||||
continue
|
||||
|
||||
if mpi_contract and comm and size > 1:
|
||||
arrays = _arrays_to_backend(tn.arrays, self.backend, self.engine)
|
||||
val = parallel_contract(tree, arrays, method='mpi', comm=comm)
|
||||
else:
|
||||
if self.backend == "torch":
|
||||
for tensor in tn.tensors:
|
||||
tensor._data = _torch_cpu_array(
|
||||
tensor._data, dtype=torch.complex128
|
||||
)
|
||||
val = complex(
|
||||
tn.contract(
|
||||
all,
|
||||
output_inds=(),
|
||||
optimize=tree,
|
||||
backend="torch",
|
||||
)
|
||||
)
|
||||
else:
|
||||
val = complex(
|
||||
tn.contract(
|
||||
all,
|
||||
output_inds=(),
|
||||
optimize=tree,
|
||||
backend=self.backend,
|
||||
)
|
||||
)
|
||||
|
||||
my_exp += coeff * complex(val)
|
||||
|
||||
if comm:
|
||||
all_exp = comm.gather(my_exp, root=0)
|
||||
if rank == 0:
|
||||
total_exp = sum(all_exp)
|
||||
return self.real(total_exp)
|
||||
return 0.0
|
||||
|
||||
return self.real(my_exp)
|
||||
|
||||
|
||||
CLASSES_ROOTS = {"numpy": "Numpy", "torch": "PyTorch", "jax": "Jax"}
|
||||
|
||||
METHODS = {
|
||||
@@ -653,13 +344,11 @@ METHODS = {
|
||||
"exp_value_observable_symbolic": exp_value_observable_symbolic,
|
||||
"_qibo_circuit_to_quimb": _qibo_circuit_to_quimb,
|
||||
"_string_to_quimb_operator": _string_to_quimb_operator,
|
||||
"expectation": expectation,
|
||||
"_expectation_parallel": _expectation_parallel,
|
||||
"circuit_ansatz": circuit_ansatz,
|
||||
}
|
||||
|
||||
|
||||
def _generate_backend(quimb_backend: str = "torch"):
|
||||
def _generate_backend(quimb_backend: str = "numpy"):
|
||||
bases = (QibotnBackend,)
|
||||
|
||||
if quimb_backend == "numpy":
|
||||
@@ -667,14 +356,9 @@ def _generate_backend(quimb_backend: str = "torch"):
|
||||
|
||||
bases += (NumpyBackend,)
|
||||
elif quimb_backend == "torch":
|
||||
try:
|
||||
from qiboml.backends import PyTorchBackend
|
||||
except ImportError:
|
||||
from qibo.backends import NumpyBackend
|
||||
from qiboml.backends import PyTorchBackend
|
||||
|
||||
bases += (NumpyBackend,)
|
||||
else:
|
||||
bases += (PyTorchBackend,)
|
||||
bases += (PyTorchBackend,)
|
||||
elif quimb_backend == "jax":
|
||||
from qiboml.backends import JaxBackend
|
||||
|
||||
|
||||
@@ -1,477 +0,0 @@
|
||||
"""Vidal/TEBD fast-path backend with qmatchatea fallback.
|
||||
|
||||
This backend targets MPS-friendly one-dimensional circuits: one-qubit gates and
|
||||
adjacent two-qubit gates, measured with Pauli-sum expectation values. Unsupported
|
||||
features fall back to the qmatchatea backend so the public behavior remains
|
||||
usable while the fast path is expanded.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
from qibo.backends import NumpyBackend
|
||||
|
||||
from qibotn.backends.abstract import QibotnBackend
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor, _gate_sites
|
||||
from qibotn.observables import check_observable
|
||||
|
||||
|
||||
def _symbolic_hamiltonian_to_pauli_terms(hamiltonian):
|
||||
terms = []
|
||||
factor_pattern = re.compile(r"([^\d]+)(\d+)")
|
||||
for term in hamiltonian.terms:
|
||||
ops = []
|
||||
for factor in term.factors:
|
||||
match = factor_pattern.match(str(factor))
|
||||
if match is None:
|
||||
raise ValueError(f"Unsupported observable factor {factor!r}.")
|
||||
name = match.group(1).upper()
|
||||
if name not in ("I", "X", "Y", "Z"):
|
||||
raise ValueError(f"Unsupported observable operator {name!r}.")
|
||||
if name != "I":
|
||||
ops.append((name, int(match.group(2))))
|
||||
terms.append((complex(term.coefficient), tuple(ops)))
|
||||
return terms
|
||||
|
||||
|
||||
def _symbolic_hamiltonian_to_operator_terms(hamiltonian):
|
||||
terms = []
|
||||
factor_pattern = re.compile(r"([^\d]+)(\d+)")
|
||||
paulis = {
|
||||
"I": np.eye(2, dtype=np.complex128),
|
||||
"X": np.array([[0, 1], [1, 0]], dtype=np.complex128),
|
||||
"Y": np.array([[0, -1j], [1j, 0]], dtype=np.complex128),
|
||||
"Z": np.array([[1, 0], [0, -1]], dtype=np.complex128),
|
||||
}
|
||||
for term in hamiltonian.terms:
|
||||
ops_by_site = {}
|
||||
for factor in term.factors:
|
||||
site = getattr(factor, "target_qubit", None)
|
||||
matrix = getattr(factor, "matrix", None)
|
||||
if site is None or matrix is None:
|
||||
match = factor_pattern.match(str(factor))
|
||||
if match is None:
|
||||
raise ValueError(f"Unsupported observable factor {factor!r}.")
|
||||
name = match.group(1).upper()
|
||||
if name not in paulis:
|
||||
raise ValueError(f"Unsupported observable operator {name!r}.")
|
||||
site = int(match.group(2))
|
||||
matrix = paulis[name]
|
||||
matrix = np.asarray(matrix, dtype=np.complex128)
|
||||
site = int(site)
|
||||
if site in ops_by_site:
|
||||
ops_by_site[site] = ops_by_site[site] @ matrix
|
||||
else:
|
||||
ops_by_site[site] = matrix
|
||||
terms.append((complex(term.coefficient), tuple(ops_by_site.items())))
|
||||
return terms
|
||||
|
||||
|
||||
def _dense_operator_to_product_terms(coeff, qubits, matrix):
|
||||
"""Expand a dense k-local operator into product-matrix terms.
|
||||
|
||||
The dense matrix basis is ordered by the provided ``qubits`` sequence. For
|
||||
example, ``qubits=[2, 5]`` means matrix rows/columns are ordered as
|
||||
``|q2 q5>``.
|
||||
"""
|
||||
qubits = tuple(int(qubit) for qubit in qubits)
|
||||
if len(set(qubits)) != len(qubits):
|
||||
raise ValueError("Dense observable qubits must be unique.")
|
||||
matrix = np.asarray(matrix, dtype=np.complex128)
|
||||
dim = 2 ** len(qubits)
|
||||
if matrix.shape != (dim, dim):
|
||||
raise ValueError(
|
||||
"Dense observable matrix shape must be "
|
||||
f"({dim}, {dim}) for {len(qubits)} qubits."
|
||||
)
|
||||
|
||||
units = [
|
||||
np.array([[1, 0], [0, 0]], dtype=np.complex128),
|
||||
np.array([[0, 1], [0, 0]], dtype=np.complex128),
|
||||
np.array([[0, 0], [1, 0]], dtype=np.complex128),
|
||||
np.array([[0, 0], [0, 1]], dtype=np.complex128),
|
||||
]
|
||||
terms = []
|
||||
for row in range(dim):
|
||||
for col in range(dim):
|
||||
value = complex(coeff) * complex(matrix[row, col])
|
||||
if value == 0:
|
||||
continue
|
||||
ops = []
|
||||
for offset, site in enumerate(qubits):
|
||||
shift = len(qubits) - offset - 1
|
||||
out_bit = (row >> shift) & 1
|
||||
in_bit = (col >> shift) & 1
|
||||
ops.append((site, units[2 * out_bit + in_bit]))
|
||||
terms.append((value, tuple(ops)))
|
||||
return terms
|
||||
|
||||
|
||||
def _dense_observable_to_operator_terms(observable):
|
||||
if not isinstance(observable, dict):
|
||||
return None
|
||||
|
||||
if "matrix" in observable:
|
||||
terms = [observable]
|
||||
else:
|
||||
terms = observable.get("dense_terms")
|
||||
if terms is None:
|
||||
raw_terms = observable.get("terms")
|
||||
if not raw_terms or not any("matrix" in term for term in raw_terms):
|
||||
return None
|
||||
terms = raw_terms
|
||||
|
||||
operator_terms = []
|
||||
for term in terms:
|
||||
if "matrix" not in term:
|
||||
raise ValueError("Dense observable terms must include a matrix.")
|
||||
qubits = term.get("qubits", term.get("sites"))
|
||||
if qubits is None:
|
||||
raise ValueError("Dense observable terms must include qubits or sites.")
|
||||
operator_terms.extend(
|
||||
_dense_operator_to_product_terms(
|
||||
term.get("coefficient", 1.0),
|
||||
qubits,
|
||||
term["matrix"],
|
||||
)
|
||||
)
|
||||
return operator_terms
|
||||
|
||||
|
||||
def _operator_terms_to_mpo(terms, nqubits):
|
||||
"""Build an exact direct-sum MPO for product-operator terms.
|
||||
|
||||
This intentionally favors correctness and generality over compression: an
|
||||
``m``-term sum becomes an MPO with bond dimension ``m``. Local Hamiltonians
|
||||
can be compressed later without changing the public expectation path.
|
||||
"""
|
||||
identity = np.eye(2, dtype=np.complex128)
|
||||
expanded_terms = []
|
||||
for coeff, ops in terms:
|
||||
local_ops = [identity for _ in range(nqubits)]
|
||||
for site, matrix in ops:
|
||||
site = int(site)
|
||||
if site < 0 or site >= nqubits:
|
||||
raise ValueError(f"Observable site {site} is outside the circuit.")
|
||||
matrix = np.asarray(matrix, dtype=np.complex128)
|
||||
if matrix.shape != (2, 2):
|
||||
raise ValueError("Only qubit local operators with shape (2, 2) are supported.")
|
||||
local_ops[site] = matrix
|
||||
expanded_terms.append((complex(coeff), local_ops))
|
||||
|
||||
if not expanded_terms:
|
||||
raise ValueError("Cannot build an MPO from an empty observable.")
|
||||
|
||||
bond_dim = len(expanded_terms)
|
||||
mpo = []
|
||||
for site in range(nqubits):
|
||||
left_dim = 1 if site == 0 else bond_dim
|
||||
right_dim = 1 if site == nqubits - 1 else bond_dim
|
||||
tensor = np.zeros((left_dim, 2, 2, right_dim), dtype=np.complex128)
|
||||
for term_index, (coeff, local_ops) in enumerate(expanded_terms):
|
||||
left = 0 if site == 0 else term_index
|
||||
right = 0 if site == nqubits - 1 else term_index
|
||||
op = coeff * local_ops[site] if site == 0 else local_ops[site]
|
||||
tensor[left, :, :, right] += op
|
||||
mpo.append(tensor)
|
||||
return mpo
|
||||
|
||||
|
||||
def _observable_mpo_tensors(observable, nqubits=None):
|
||||
if isinstance(observable, dict):
|
||||
if "mpo_tensors" in observable:
|
||||
return observable["mpo_tensors"]
|
||||
if "mpo" in observable:
|
||||
return observable["mpo"]
|
||||
if nqubits is not None:
|
||||
terms = _dense_observable_to_operator_terms(observable)
|
||||
if terms is not None:
|
||||
return _operator_terms_to_mpo(terms, nqubits)
|
||||
return None
|
||||
|
||||
|
||||
def _unsupported_reason(circuit):
|
||||
for gate in circuit.queue:
|
||||
name = getattr(gate, "name", gate.__class__.__name__)
|
||||
sites = _gate_sites(gate)
|
||||
if not sites:
|
||||
return f"gate {name} has no target qubits"
|
||||
if len(sites) > 2:
|
||||
return f"gate {name} acts on {len(sites)} qubits"
|
||||
if len(sites) == 2 and abs(sites[0] - sites[1]) != 1:
|
||||
return f"gate {name} is non-adjacent on qubits {sites}"
|
||||
if not hasattr(gate, "matrix"):
|
||||
return f"gate {name} does not expose a matrix"
|
||||
return None
|
||||
|
||||
|
||||
def _can_route_non_adjacent(circuit):
|
||||
"""True if the circuit's only unsupported feature is non-adjacent 2Q gates.
|
||||
|
||||
SWAP routing can fix non-adjacent gates at compile time. Multi-qubit
|
||||
gates and matrix-less gates are truly unsupported.
|
||||
"""
|
||||
for gate in circuit.queue:
|
||||
sites = _gate_sites(gate)
|
||||
if not sites:
|
||||
return False
|
||||
if len(sites) > 2:
|
||||
return False
|
||||
if not hasattr(gate, "matrix"):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
class _PreparedCircuit:
|
||||
nqubits: int
|
||||
queue: list
|
||||
|
||||
|
||||
def _decompose_gate_for_mps(gate, nqubits, stack=()):
|
||||
sites = _gate_sites(gate)
|
||||
if len(sites) <= 2:
|
||||
return [gate]
|
||||
if gate in stack or not hasattr(gate, "decompose"):
|
||||
name = getattr(gate, "name", gate.__class__.__name__)
|
||||
raise ValueError(f"gate {name} acts on {len(sites)} qubits")
|
||||
|
||||
free = [qubit for qubit in range(nqubits) if qubit not in sites]
|
||||
try:
|
||||
decomposed = gate.decompose(*free, use_toffolis=False, method="standard")
|
||||
except TypeError:
|
||||
decomposed = gate.decompose(*free)
|
||||
if not decomposed or decomposed == [gate]:
|
||||
name = getattr(gate, "name", gate.__class__.__name__)
|
||||
raise ValueError(f"gate {name} could not be decomposed for Vidal MPS")
|
||||
|
||||
result = []
|
||||
for item in decomposed:
|
||||
result.extend(_decompose_gate_for_mps(item, nqubits, stack + (gate,)))
|
||||
return result
|
||||
|
||||
|
||||
def _prepare_circuit_for_mps(circuit, decompose=True):
|
||||
if not decompose:
|
||||
return circuit
|
||||
queue = []
|
||||
for gate in circuit.queue:
|
||||
queue.extend(_decompose_gate_for_mps(gate, circuit.nqubits))
|
||||
return _PreparedCircuit(nqubits=circuit.nqubits, queue=queue)
|
||||
|
||||
|
||||
@dataclass
|
||||
class VidalBackend(QibotnBackend, NumpyBackend):
|
||||
"""QiboTN backend using Vidal/TEBD when possible.
|
||||
|
||||
The fast path supports:
|
||||
- one-qubit gates with ``gate.matrix()``;
|
||||
- adjacent two-qubit gates with ``gate.matrix()``;
|
||||
- Qibo ``SymbolicHamiltonian`` / qibotn dict Pauli-sum expectation values;
|
||||
- MPI chain segmentation through ``mpi_approach="CT"``.
|
||||
|
||||
Unsupported operations are delegated to qmatchatea.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.name = "qibotn"
|
||||
self.platform = "vidal"
|
||||
self.precision = "double"
|
||||
self.rank = 0
|
||||
self.last_truncation_error = 0.0
|
||||
self.last_max_truncation_error = 0.0
|
||||
self.configure_tn_simulation()
|
||||
|
||||
def configure_tn_simulation(
|
||||
self,
|
||||
ansatz: str = "MPS",
|
||||
max_bond_dimension: int | None = 10,
|
||||
cut_ratio: float | None = 1e-9,
|
||||
trunc_tracking_mode: str = "C",
|
||||
svd_control: str = "E!",
|
||||
ini_bond_dimension: int = 1,
|
||||
tensor_module: str = "torch",
|
||||
compile_circuit: bool = False,
|
||||
cache_gate_tensors: bool = True,
|
||||
track_memory: bool = False,
|
||||
mpi_approach: str = "SR",
|
||||
mpi_num_procs: int = 1,
|
||||
mpi_where_barriers: int = -1,
|
||||
mpi_isometrization: int = -1,
|
||||
mpi_term_batch_size: int | None = None,
|
||||
fallback: bool = True,
|
||||
):
|
||||
self.ansatz = ansatz
|
||||
self.max_bond_dimension = max_bond_dimension
|
||||
self.cut_ratio = cut_ratio
|
||||
self.trunc_tracking_mode = trunc_tracking_mode
|
||||
self.svd_control = svd_control
|
||||
self.ini_bond_dimension = ini_bond_dimension
|
||||
self.tensor_module = tensor_module
|
||||
self.compile_circuit = compile_circuit
|
||||
self.cache_gate_tensors = cache_gate_tensors
|
||||
self.track_memory = track_memory
|
||||
self.mpi_approach = mpi_approach.upper()
|
||||
self.mpi_num_procs = mpi_num_procs
|
||||
self.mpi_where_barriers = mpi_where_barriers
|
||||
self.mpi_isometrization = mpi_isometrization
|
||||
self.mpi_term_batch_size = mpi_term_batch_size
|
||||
self.fallback = fallback
|
||||
self._fallback_backend = None
|
||||
|
||||
def _setup_backend_specifics(self):
|
||||
return None
|
||||
|
||||
def _qmatchatea_fallback(self):
|
||||
if self._fallback_backend is None:
|
||||
backend = QMatchaTeaBackend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz=self.ansatz,
|
||||
max_bond_dimension=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
trunc_tracking_mode=self.trunc_tracking_mode,
|
||||
svd_control=self.svd_control,
|
||||
ini_bond_dimension=self.ini_bond_dimension,
|
||||
tensor_module=self.tensor_module,
|
||||
compile_circuit=self.compile_circuit,
|
||||
cache_gate_tensors=self.cache_gate_tensors,
|
||||
track_memory=self.track_memory,
|
||||
mpi_approach=self.mpi_approach,
|
||||
mpi_num_procs=self.mpi_num_procs,
|
||||
mpi_where_barriers=self.mpi_where_barriers,
|
||||
mpi_isometrization=self.mpi_isometrization,
|
||||
)
|
||||
self._fallback_backend = backend
|
||||
return self._fallback_backend
|
||||
|
||||
def _fallback_or_raise(self, reason):
|
||||
if not self.fallback:
|
||||
raise NotImplementedError(reason)
|
||||
return self._qmatchatea_fallback()
|
||||
|
||||
def _preprocess_circuit(self, circuit, compile_circuit):
|
||||
"""Decompose unsupported multi-qubit gates for the local Vidal path."""
|
||||
return _prepare_circuit_for_mps(circuit, decompose=True)
|
||||
|
||||
def _run_fast_executor(self, circuit, compile_circuit=True):
|
||||
if self.mpi_approach == "CT":
|
||||
from mpi4py import MPI
|
||||
|
||||
self.rank = MPI.COMM_WORLD.Get_rank()
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
comm=MPI.COMM_WORLD,
|
||||
)
|
||||
else:
|
||||
self.rank = 0
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=self.max_bond_dimension,
|
||||
cut_ratio=self.cut_ratio,
|
||||
tensor_module=self.tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit, compile_circuit=compile_circuit)
|
||||
return executor
|
||||
|
||||
def expectation(self, circuit, observable, preprocess=True, compile_circuit=None):
|
||||
if self.ansatz.upper() != "MPS":
|
||||
backend = self._fallback_or_raise("VidalBackend supports only MPS.")
|
||||
return backend.expectation(circuit, observable, preprocess, compile_circuit)
|
||||
|
||||
original_circuit = circuit
|
||||
if compile_circuit is None:
|
||||
compile_circuit = self.compile_circuit
|
||||
|
||||
if preprocess:
|
||||
try:
|
||||
circuit = self._preprocess_circuit(circuit, compile_circuit)
|
||||
except Exception as exc:
|
||||
backend = self._fallback_or_raise(
|
||||
f"VidalBackend preprocessing failed: {exc}"
|
||||
)
|
||||
return backend.expectation(
|
||||
original_circuit, observable, preprocess, compile_circuit
|
||||
)
|
||||
|
||||
reason = _unsupported_reason(circuit)
|
||||
if reason is not None:
|
||||
# Non-adjacent gates can be routed at compile time
|
||||
if compile_circuit and _can_route_non_adjacent(circuit):
|
||||
pass # proceed with Vidal + SWAP routing
|
||||
else:
|
||||
backend = self._fallback_or_raise(reason)
|
||||
return backend.expectation(
|
||||
original_circuit, observable, preprocess, compile_circuit
|
||||
)
|
||||
|
||||
executor = self._run_fast_executor(circuit, compile_circuit=compile_circuit)
|
||||
self.last_truncation_error = float(
|
||||
executor.global_truncation_error()
|
||||
if hasattr(executor, "global_truncation_error")
|
||||
else executor.truncation_error
|
||||
)
|
||||
self.last_max_truncation_error = float(
|
||||
executor.global_max_truncation_error()
|
||||
if hasattr(executor, "global_max_truncation_error")
|
||||
else executor.max_truncation_error
|
||||
)
|
||||
|
||||
mpo_tensors = _observable_mpo_tensors(observable, circuit.nqubits)
|
||||
if mpo_tensors is not None:
|
||||
if self.mpi_approach == "CT":
|
||||
value = executor.expectation_mpo_root(mpo_tensors)
|
||||
from qtealeaves.tooling.mpisupport import MPI
|
||||
|
||||
if MPI is not None and MPI.COMM_WORLD.Get_rank() != 0:
|
||||
return np.nan
|
||||
return value
|
||||
return executor.expectation_mpo(mpo_tensors)
|
||||
|
||||
hamiltonian = check_observable(observable, circuit.nqubits)
|
||||
try:
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(hamiltonian)
|
||||
except ValueError as exc:
|
||||
backend = self._fallback_or_raise(str(exc))
|
||||
return backend.expectation(
|
||||
original_circuit, observable, preprocess, compile_circuit
|
||||
)
|
||||
|
||||
mpo_tensors = _operator_terms_to_mpo(terms, circuit.nqubits)
|
||||
if self.mpi_approach == "CT":
|
||||
value = executor.expectation_mpo_root(mpo_tensors)
|
||||
from qtealeaves.tooling.mpisupport import MPI
|
||||
|
||||
if MPI is not None and MPI.COMM_WORLD.Get_rank() != 0:
|
||||
return np.nan
|
||||
return value
|
||||
return executor.expectation_mpo(mpo_tensors)
|
||||
|
||||
def execute_circuit(
|
||||
self,
|
||||
circuit,
|
||||
initial_state=None,
|
||||
nshots=None,
|
||||
prob_type=None,
|
||||
return_array=False,
|
||||
**prob_kwargs,
|
||||
):
|
||||
backend = self._fallback_or_raise(
|
||||
"VidalBackend.execute_circuit is delegated to qmatchatea."
|
||||
)
|
||||
return backend.execute_circuit(
|
||||
circuit,
|
||||
initial_state=initial_state,
|
||||
nshots=nshots,
|
||||
prob_type=prob_type,
|
||||
return_array=return_array,
|
||||
**prob_kwargs,
|
||||
)
|
||||
@@ -1,524 +0,0 @@
|
||||
"""Segmented MPI Vidal/TEBD executor.
|
||||
|
||||
Each rank owns a contiguous interval of sites. Gates fully inside an interval
|
||||
are applied locally. Only two-site gates crossing a rank boundary communicate
|
||||
the neighboring edge tensor and the resulting boundary update.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
|
||||
from qibotn.backends.vidal_tebd import (
|
||||
_asarray,
|
||||
_backend_module,
|
||||
_build_theta_svd_matrix,
|
||||
_disjoint_batches,
|
||||
_fuse_one_site_blocks,
|
||||
_gate_sites,
|
||||
_is_two_qubit_batch,
|
||||
_make_two_site_update,
|
||||
_ones,
|
||||
_real_if_close,
|
||||
_route_non_adjacent_gates,
|
||||
_svd,
|
||||
_tensor_update_from_numpy,
|
||||
_tensor_update_to_numpy,
|
||||
_to_float,
|
||||
_to_numpy,
|
||||
_transpose,
|
||||
VidalTEBDExecutor,
|
||||
)
|
||||
|
||||
_EDGE_TAG = 1701
|
||||
_UPDATE_TAG = 1702
|
||||
_EXPECT_ENV_TAG = 1703
|
||||
_EXPECT_RESULT_TAG = 1704
|
||||
|
||||
|
||||
def _partition_sites(nsites, nranks):
|
||||
base = nsites // nranks
|
||||
rem = nsites % nranks
|
||||
starts = [0]
|
||||
for rank in range(nranks):
|
||||
starts.append(starts[-1] + base + int(rank < rem))
|
||||
return starts
|
||||
|
||||
|
||||
@dataclass
|
||||
class SegmentVidalMPIExecutor:
|
||||
nqubits: int
|
||||
max_bond: int | None
|
||||
comm: object
|
||||
cut_ratio: float | None = 1e-12
|
||||
tensor_module: str = "torch"
|
||||
|
||||
def __post_init__(self):
|
||||
self.rank = self.comm.Get_rank()
|
||||
self.size = self.comm.Get_size()
|
||||
self.starts = _partition_sites(self.nqubits, self.size)
|
||||
self.start = self.starts[self.rank]
|
||||
self.end = self.starts[self.rank + 1]
|
||||
if self.start == self.end:
|
||||
raise ValueError("SegmentVidalMPIExecutor requires at least one site per rank.")
|
||||
|
||||
from qibotn.backends.cpu import _bind_numa_node
|
||||
|
||||
self.numa_domain = _bind_numa_node(self.rank)
|
||||
|
||||
self.xp = _backend_module(self.tensor_module)
|
||||
if self.xp is np:
|
||||
self.dtype = np.complex128
|
||||
self.device = None
|
||||
else:
|
||||
self.dtype = self.xp.complex128
|
||||
self.device = self.xp.device("cpu")
|
||||
|
||||
self.gammas = {}
|
||||
for site in range(self.start, self.end):
|
||||
self.gammas[site] = _asarray(
|
||||
self.xp, [[[1.0 + 0.0j], [0.0 + 0.0j]]], self.dtype
|
||||
)
|
||||
|
||||
self.lambdas = {
|
||||
bond: _ones(self.xp, 1, self.dtype, self.device)
|
||||
for bond in range(self.start, self.end + 1)
|
||||
}
|
||||
self._accumulated_truncation_error = 0.0
|
||||
self._max_truncation_error = 0.0
|
||||
|
||||
@property
|
||||
def truncation_error(self):
|
||||
return self._accumulated_truncation_error
|
||||
|
||||
def global_truncation_error(self):
|
||||
return self.comm.allreduce(self._accumulated_truncation_error, op=MPI.SUM)
|
||||
|
||||
@property
|
||||
def max_truncation_error(self):
|
||||
return self._max_truncation_error
|
||||
|
||||
def global_max_truncation_error(self):
|
||||
return self.comm.allreduce(self._max_truncation_error, op=MPI.MAX)
|
||||
|
||||
def owns_site(self, site):
|
||||
return self.start <= site < self.end
|
||||
|
||||
def owner_of(self, site):
|
||||
return int(np.searchsorted(self.starts, site, side="right") - 1)
|
||||
|
||||
def run_circuit(self, circuit, compile_circuit=True):
|
||||
timings = {
|
||||
"local_compute": 0.0,
|
||||
"edge_exchange": 0.0,
|
||||
"boundary_compute": 0.0,
|
||||
"boundary_update": 0.0,
|
||||
"one_site": 0.0,
|
||||
"gather": 0.0,
|
||||
}
|
||||
|
||||
gates = circuit.queue
|
||||
if compile_circuit:
|
||||
gates = _route_non_adjacent_gates(gates, circuit.nqubits)
|
||||
gates = _fuse_one_site_blocks(gates)
|
||||
for batch in _disjoint_batches(gates):
|
||||
if _is_two_qubit_batch(batch):
|
||||
self._apply_two_site_batch(batch, timings)
|
||||
else:
|
||||
tic = time.perf_counter()
|
||||
for gate in batch:
|
||||
sites = _gate_sites(gate)
|
||||
if len(sites) == 1 and self.owns_site(sites[0]):
|
||||
op = _asarray(self.xp, gate.matrix(), self.dtype)
|
||||
self.apply_one_site(op, sites[0])
|
||||
elif len(sites) == 2:
|
||||
self._apply_two_site_batch([gate], timings)
|
||||
elif len(sites) > 2:
|
||||
raise NotImplementedError("Only one- and two-qubit gates are supported.")
|
||||
timings["one_site"] += time.perf_counter() - tic
|
||||
|
||||
return timings
|
||||
|
||||
def apply_one_site(self, op, pos):
|
||||
self.gammas[pos] = self.xp.einsum("st,atb->asb", op, self.gammas[pos])
|
||||
|
||||
def _apply_two_site_batch(self, batch, timings):
|
||||
local_gates = []
|
||||
boundary_specs = []
|
||||
recv_left_update = False
|
||||
for gate in batch:
|
||||
sites = _gate_sites(gate)
|
||||
if abs(sites[0] - sites[1]) != 1:
|
||||
raise NotImplementedError("Segment Vidal supports adjacent two-qubit gates only.")
|
||||
left, right = sorted(sites)
|
||||
left_owner = self.owner_of(left)
|
||||
right_owner = self.owner_of(right)
|
||||
if left_owner == self.rank and right_owner == self.rank:
|
||||
local_gates.append(gate)
|
||||
elif left_owner == self.rank:
|
||||
boundary_specs.append((gate, left, right))
|
||||
elif right_owner == self.rank:
|
||||
recv_left_update = True
|
||||
|
||||
tic = time.perf_counter()
|
||||
edge_send_req = None
|
||||
if recv_left_update:
|
||||
edge_send_req = self.comm.isend(
|
||||
self._edge_payload(), dest=self.rank - 1, tag=_EDGE_TAG
|
||||
)
|
||||
right_edge = (
|
||||
self.comm.recv(source=self.rank + 1, tag=_EDGE_TAG)
|
||||
if boundary_specs
|
||||
else None
|
||||
)
|
||||
timings["edge_exchange"] += time.perf_counter() - tic
|
||||
|
||||
boundary_update = None
|
||||
tic = time.perf_counter()
|
||||
for gate, left, right in boundary_specs:
|
||||
boundary_update = self._compute_boundary_update(
|
||||
gate, left, right, right_edge
|
||||
)
|
||||
timings["boundary_compute"] += time.perf_counter() - tic
|
||||
|
||||
tic = time.perf_counter()
|
||||
update_send_req = None
|
||||
if boundary_update is not None:
|
||||
update_send_req = self.comm.isend(
|
||||
boundary_update, dest=self.rank + 1, tag=_UPDATE_TAG
|
||||
)
|
||||
timings["boundary_update"] += time.perf_counter() - tic
|
||||
|
||||
tic = time.perf_counter()
|
||||
local_items = [
|
||||
self._compute_owned_two_site_update(gate)
|
||||
for gate in local_gates
|
||||
]
|
||||
timings["local_compute"] += time.perf_counter() - tic
|
||||
|
||||
tic = time.perf_counter()
|
||||
left_boundary_update = (
|
||||
self.comm.recv(source=self.rank - 1, tag=_UPDATE_TAG)
|
||||
if recv_left_update
|
||||
else None
|
||||
)
|
||||
if update_send_req is not None:
|
||||
update_send_req.wait()
|
||||
if edge_send_req is not None:
|
||||
edge_send_req.wait()
|
||||
timings["boundary_update"] += time.perf_counter() - tic
|
||||
|
||||
for update in local_items:
|
||||
self._install_update(update)
|
||||
if boundary_update is not None:
|
||||
self._install_update(boundary_update)
|
||||
if left_boundary_update is not None:
|
||||
self._install_update(left_boundary_update)
|
||||
|
||||
def _edge_payload(self):
|
||||
return {
|
||||
"start": self.start,
|
||||
"end": self.end,
|
||||
"gamma_start": _to_numpy(self.gammas[self.start]),
|
||||
"lambda_after_start": _to_numpy(self.lambdas[self.start + 1]),
|
||||
}
|
||||
|
||||
def _compute_owned_two_site_update(self, gate):
|
||||
sites = _gate_sites(gate)
|
||||
op = _asarray(self.xp, gate.matrix(), self.dtype)
|
||||
left, right = sites
|
||||
if left > right:
|
||||
left, right = right, left
|
||||
op = _transpose(self.xp, op.reshape(2, 2, 2, 2), (1, 0, 3, 2)).reshape(4, 4)
|
||||
item = self._build_item(
|
||||
left,
|
||||
op,
|
||||
self.lambdas[left],
|
||||
self.lambdas[left + 1],
|
||||
self.lambdas[left + 2],
|
||||
self.gammas[left],
|
||||
self.gammas[right],
|
||||
)
|
||||
split = _svd(self.xp, item["matrix"])
|
||||
return _make_two_site_update(
|
||||
item, *split, self.max_bond, self.cut_ratio, self.xp
|
||||
)
|
||||
|
||||
def _compute_boundary_update(self, gate, left, right, remote):
|
||||
op = _asarray(self.xp, gate.matrix(), self.dtype)
|
||||
sites = _gate_sites(gate)
|
||||
if sites[0] > sites[1]:
|
||||
op = _transpose(self.xp, op.reshape(2, 2, 2, 2), (1, 0, 3, 2)).reshape(4, 4)
|
||||
|
||||
gamma_right = _asarray(self.xp, remote["gamma_start"], self.dtype)
|
||||
lam_right = _asarray(
|
||||
self.xp,
|
||||
remote["lambda_after_start"],
|
||||
self.xp.float64 if self.xp is not np else np.float64,
|
||||
)
|
||||
item = self._build_item(
|
||||
left,
|
||||
op,
|
||||
self.lambdas[left],
|
||||
self.lambdas[left + 1],
|
||||
lam_right,
|
||||
self.gammas[left],
|
||||
gamma_right,
|
||||
)
|
||||
split = _svd(self.xp, item["matrix"])
|
||||
return _tensor_update_to_numpy(
|
||||
_make_two_site_update(
|
||||
item, *split, self.max_bond, self.cut_ratio, self.xp
|
||||
)
|
||||
)
|
||||
|
||||
def _build_item(self, site, op, lam_left, lam_mid, lam_right, gamma_left, gamma_right):
|
||||
result = _build_theta_svd_matrix(
|
||||
op, self.xp, lam_left, lam_mid, lam_right, gamma_left, gamma_right
|
||||
)
|
||||
result["site"] = site
|
||||
result["lam_left"] = lam_left
|
||||
result["lam_right"] = lam_right
|
||||
return result
|
||||
|
||||
def _install_update(self, update):
|
||||
if isinstance(update["left"], np.ndarray):
|
||||
update = _tensor_update_from_numpy(self.xp, update, self.dtype)
|
||||
truncation_error = update.get("truncation_error", 0.0)
|
||||
self._accumulated_truncation_error += truncation_error
|
||||
self._max_truncation_error = max(
|
||||
self._max_truncation_error,
|
||||
truncation_error,
|
||||
)
|
||||
site = update["site"]
|
||||
if self.owns_site(site):
|
||||
self.gammas[site] = update["left"]
|
||||
if self.owns_site(site + 1):
|
||||
self.gammas[site + 1] = update["right"]
|
||||
if self.start <= site + 1 <= self.end:
|
||||
self.lambdas[site + 1] = update["lambda"]
|
||||
|
||||
def gather_full_state(self):
|
||||
payload = {
|
||||
"start": self.start,
|
||||
"end": self.end,
|
||||
"gammas": {site: _to_numpy(tensor) for site, tensor in self.gammas.items()},
|
||||
"lambdas": {bond: _to_numpy(tensor) for bond, tensor in self.lambdas.items()},
|
||||
}
|
||||
return self.comm.gather(payload, root=0)
|
||||
|
||||
def expectation_pauli_sum_root(self, terms, term_batch_size=None):
|
||||
paulis = {
|
||||
"I": self._eye(2),
|
||||
"X": _asarray(self.xp, [[0, 1], [1, 0]], self.dtype),
|
||||
"Y": _asarray(self.xp, [[0, -1j], [1j, 0]], self.dtype),
|
||||
"Z": _asarray(self.xp, [[1, 0], [0, -1]], self.dtype),
|
||||
}
|
||||
operator_terms = [
|
||||
(
|
||||
coeff,
|
||||
tuple((site, paulis[name.upper()]) for name, site in ops),
|
||||
)
|
||||
for coeff, ops in terms
|
||||
]
|
||||
return self.expectation_operator_sum_root(
|
||||
operator_terms,
|
||||
term_batch_size=term_batch_size,
|
||||
)
|
||||
|
||||
def expectation_operator_sum_root(self, terms, term_batch_size=None):
|
||||
if term_batch_size is None:
|
||||
term_batch_size = max(1, len(terms))
|
||||
norm = self._distributed_product_expectation({})
|
||||
total = 0.0 + 0.0j
|
||||
for start in range(0, len(terms), int(term_batch_size)):
|
||||
batch = terms[start : start + int(term_batch_size)]
|
||||
values = self._distributed_operator_batch_expectation(batch, norm)
|
||||
if self.rank == 0:
|
||||
for (coeff, _), term_value in zip(batch, values):
|
||||
total += complex(coeff) * complex(term_value)
|
||||
return None if self.rank != 0 else _real_if_close(total / norm)
|
||||
|
||||
def _eye(self, size):
|
||||
if self.xp is np:
|
||||
return np.eye(size, dtype=self.dtype)
|
||||
return self.xp.eye(size, dtype=self.dtype, device=self.device)
|
||||
|
||||
def _distributed_product_expectation(self, operators):
|
||||
if self.rank == 0:
|
||||
env = self._segment_product_environment(operators)
|
||||
if self.size == 1:
|
||||
return env.reshape(-1)[0]
|
||||
self.comm.send(_to_numpy(env), dest=1, tag=_EXPECT_ENV_TAG)
|
||||
return self.comm.recv(source=self.size - 1, tag=_EXPECT_RESULT_TAG)
|
||||
|
||||
incoming = self.comm.recv(source=self.rank - 1, tag=_EXPECT_ENV_TAG)
|
||||
env = self._segment_product_environment(operators, incoming)
|
||||
if self.rank == self.size - 1:
|
||||
self.comm.send(_to_numpy(env).reshape(-1)[0], dest=0, tag=_EXPECT_RESULT_TAG)
|
||||
else:
|
||||
self.comm.send(_to_numpy(env), dest=self.rank + 1, tag=_EXPECT_ENV_TAG)
|
||||
return None
|
||||
|
||||
def _segment_product_environment(self, operators, incoming=None):
|
||||
if incoming is None:
|
||||
env = _asarray(
|
||||
self.xp,
|
||||
np.eye(len(self.lambdas[self.start]), dtype=np.complex128),
|
||||
self.dtype,
|
||||
)
|
||||
else:
|
||||
env = _asarray(self.xp, incoming, self.dtype)
|
||||
|
||||
identity = self._eye(2)
|
||||
for site in range(self.start, self.end):
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
op = operators.get(site, identity)
|
||||
env = self.xp.einsum(
|
||||
"xy,xsb,st,ytd->bd", env, self._conj(tensor), op, tensor
|
||||
)
|
||||
return env
|
||||
|
||||
def _distributed_operator_batch_expectation(self, terms, norm):
|
||||
if not terms:
|
||||
return []
|
||||
if all(not ops for _, ops in terms):
|
||||
return [norm] * len(terms) if self.rank == 0 else None
|
||||
|
||||
batch_ops = [
|
||||
{int(site): _asarray(self.xp, matrix, self.dtype) for site, matrix in ops}
|
||||
for _, ops in terms
|
||||
]
|
||||
if self.rank == 0:
|
||||
env = self._segment_operator_batch_environment(batch_ops)
|
||||
if self.size == 1:
|
||||
return list(env.reshape(len(terms), -1)[:, 0])
|
||||
self.comm.send(_to_numpy(env), dest=1, tag=_EXPECT_ENV_TAG)
|
||||
return self.comm.recv(source=self.size - 1, tag=_EXPECT_RESULT_TAG)
|
||||
|
||||
incoming = self.comm.recv(source=self.rank - 1, tag=_EXPECT_ENV_TAG)
|
||||
env = self._segment_operator_batch_environment(batch_ops, incoming)
|
||||
if self.rank == self.size - 1:
|
||||
values = list(_to_numpy(env).reshape(len(terms), -1)[:, 0])
|
||||
self.comm.send(values, dest=0, tag=_EXPECT_RESULT_TAG)
|
||||
else:
|
||||
self.comm.send(_to_numpy(env), dest=self.rank + 1, tag=_EXPECT_ENV_TAG)
|
||||
return None
|
||||
|
||||
def _segment_operator_batch_environment(self, batch_ops, incoming=None):
|
||||
batch_size = len(batch_ops)
|
||||
if incoming is None:
|
||||
dim = len(self.lambdas[self.start])
|
||||
env = _asarray(
|
||||
self.xp,
|
||||
np.tile(np.eye(dim, dtype=np.complex128), (batch_size, 1, 1)),
|
||||
self.dtype,
|
||||
)
|
||||
else:
|
||||
env = _asarray(self.xp, incoming, self.dtype)
|
||||
|
||||
identity = self._eye(2)
|
||||
for site in range(self.start, self.end):
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
ops = self.xp.stack(
|
||||
[operators.get(site, identity) for operators in batch_ops],
|
||||
axis=0,
|
||||
)
|
||||
env = self.xp.einsum(
|
||||
"nxy,xsb,nst,ytd->nbd",
|
||||
env,
|
||||
self._conj(tensor),
|
||||
ops,
|
||||
tensor,
|
||||
)
|
||||
return env
|
||||
|
||||
def _conj(self, tensor):
|
||||
return np.conjugate(tensor) if self.xp is np else tensor.conj()
|
||||
|
||||
def expectation_mpo_root(self, mpo_tensors):
|
||||
if len(mpo_tensors) != self.nqubits:
|
||||
raise ValueError(
|
||||
f"Expected {self.nqubits} MPO tensors, got {len(mpo_tensors)}."
|
||||
)
|
||||
norm = self._distributed_product_expectation({})
|
||||
if self.rank == 0:
|
||||
env = self._segment_mpo_environment(mpo_tensors)
|
||||
if self.size == 1:
|
||||
return _real_if_close(env.reshape(-1)[0] / norm)
|
||||
self.comm.send(_to_numpy(env), dest=1, tag=_EXPECT_ENV_TAG)
|
||||
value = self.comm.recv(source=self.size - 1, tag=_EXPECT_RESULT_TAG)
|
||||
return _real_if_close(value / norm)
|
||||
|
||||
incoming = self.comm.recv(source=self.rank - 1, tag=_EXPECT_ENV_TAG)
|
||||
env = self._segment_mpo_environment(mpo_tensors, incoming)
|
||||
if self.rank == self.size - 1:
|
||||
self.comm.send(
|
||||
_to_numpy(env).reshape(-1)[0],
|
||||
dest=0,
|
||||
tag=_EXPECT_RESULT_TAG,
|
||||
)
|
||||
else:
|
||||
self.comm.send(_to_numpy(env), dest=self.rank + 1, tag=_EXPECT_ENV_TAG)
|
||||
return None
|
||||
|
||||
def _segment_mpo_environment(self, mpo_tensors, incoming=None):
|
||||
if incoming is None:
|
||||
left_dim = len(self.lambdas[self.start])
|
||||
env = _asarray(
|
||||
self.xp,
|
||||
np.zeros((left_dim, 1, left_dim), dtype=np.complex128),
|
||||
self.dtype,
|
||||
)
|
||||
env[:, 0, :] = self._eye(left_dim)
|
||||
else:
|
||||
env = _asarray(self.xp, incoming, self.dtype)
|
||||
|
||||
for site in range(self.start, self.end):
|
||||
mpo = _asarray(self.xp, mpo_tensors[site], self.dtype)
|
||||
if mpo.ndim != 4 or mpo.shape[1:3] != (2, 2):
|
||||
raise ValueError(
|
||||
"Each MPO tensor must have shape "
|
||||
"(left_bond, 2, 2, right_bond)."
|
||||
)
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
env = self.xp.einsum(
|
||||
"xlc,xub,lutr,ctd->brd",
|
||||
env,
|
||||
self._conj(tensor),
|
||||
mpo,
|
||||
tensor,
|
||||
)
|
||||
return env
|
||||
|
||||
def expectation_ring_xz_root(self):
|
||||
terms = [
|
||||
(0.5, (("X", site), ("Z", (site + 1) % self.nqubits)))
|
||||
for site in range(self.nqubits)
|
||||
]
|
||||
return self.expectation_pauli_sum_root(terms)
|
||||
|
||||
|
||||
def run_segment_vidal_mpi_ring_xz(
|
||||
circuit,
|
||||
max_bond,
|
||||
comm,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module="torch",
|
||||
):
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=max_bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
timings = executor.run_circuit(circuit)
|
||||
tic = time.perf_counter()
|
||||
value = executor.expectation_ring_xz_root()
|
||||
timings["gather"] = time.perf_counter() - tic
|
||||
return value, timings
|
||||
@@ -1,605 +0,0 @@
|
||||
"""Vidal/TEBD MPS executor for layer-parallel circuit simulation.
|
||||
|
||||
This module is intentionally small and focused on the circuit family used by the
|
||||
MPS benchmarks: one-qubit gates and adjacent two-qubit gates on a 1D chain. It
|
||||
keeps the state in Vidal form, so gates acting on disjoint bonds can be applied
|
||||
in parallel without moving a global mixed-canonical center.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def _backend_module(tensor_module):
|
||||
if tensor_module == "torch":
|
||||
import torch
|
||||
|
||||
return torch
|
||||
if tensor_module == "numpy":
|
||||
return np
|
||||
raise ValueError(f"Unsupported tensor module {tensor_module!r}.")
|
||||
|
||||
|
||||
def _asarray(xp, value, dtype):
|
||||
if xp is np:
|
||||
return np.asarray(value, dtype=dtype)
|
||||
return xp.as_tensor(value, dtype=dtype)
|
||||
|
||||
|
||||
def _ones(xp, size, dtype, device=None):
|
||||
if xp is np:
|
||||
return np.ones(size, dtype=np.float64 if dtype == np.complex128 else np.float32)
|
||||
real_dtype = xp.float64 if dtype == xp.complex128 else xp.float32
|
||||
return xp.ones(size, dtype=real_dtype, device=device)
|
||||
|
||||
|
||||
def _eye(xp, size, dtype, device=None):
|
||||
if xp is np:
|
||||
return np.eye(size, dtype=dtype)
|
||||
return xp.eye(size, dtype=dtype, device=device)
|
||||
|
||||
|
||||
def _conj(xp, tensor):
|
||||
return np.conjugate(tensor) if xp is np else tensor.conj()
|
||||
|
||||
|
||||
def _transpose(xp, tensor, axes):
|
||||
return np.transpose(tensor, axes) if xp is np else tensor.permute(*axes)
|
||||
|
||||
|
||||
def _vdot(xp, left, right):
|
||||
if xp is np:
|
||||
return np.vdot(left.reshape(-1), right.reshape(-1))
|
||||
return xp.vdot(left.reshape(-1), right.reshape(-1))
|
||||
|
||||
|
||||
def _to_float(x):
|
||||
if hasattr(x, "detach"):
|
||||
return float(x.detach().cpu().item())
|
||||
return float(x)
|
||||
|
||||
|
||||
def _to_scalar(x):
|
||||
if hasattr(x, "detach"):
|
||||
return x.detach().cpu().item()
|
||||
if isinstance(x, np.ndarray):
|
||||
return x.item()
|
||||
return x
|
||||
|
||||
|
||||
def _real_if_close(x, tol=1000):
|
||||
value = np.real_if_close(x, tol=tol)
|
||||
return value.item() if isinstance(value, np.ndarray) else value
|
||||
|
||||
|
||||
def _to_numpy(tensor):
|
||||
if hasattr(tensor, "detach"):
|
||||
return tensor.detach().cpu().numpy()
|
||||
return np.asarray(tensor)
|
||||
|
||||
|
||||
def _tensor_update_to_numpy(update):
|
||||
result = {
|
||||
"site": int(update["site"]),
|
||||
"left": _to_numpy(update["left"]),
|
||||
"right": _to_numpy(update["right"]),
|
||||
"lambda": _to_numpy(update["lambda"]),
|
||||
}
|
||||
if "truncation_error" in update:
|
||||
result["truncation_error"] = float(update["truncation_error"])
|
||||
return result
|
||||
|
||||
|
||||
def _tensor_update_from_numpy(xp, update, dtype):
|
||||
if xp is np:
|
||||
return update
|
||||
result = {
|
||||
"site": update["site"],
|
||||
"left": _asarray(xp, update["left"], dtype),
|
||||
"right": _asarray(xp, update["right"], dtype),
|
||||
"lambda": xp.as_tensor(
|
||||
update["lambda"],
|
||||
dtype=xp.float64 if dtype == xp.complex128 else xp.float32,
|
||||
),
|
||||
}
|
||||
if "truncation_error" in update:
|
||||
result["truncation_error"] = float(update["truncation_error"])
|
||||
return result
|
||||
|
||||
|
||||
def _svd(xp, matrix):
|
||||
return _svd_eigh(xp, matrix)
|
||||
|
||||
|
||||
def _svd_eigh(xp, matrix):
|
||||
"""SVD through Hermitian eigendecomposition.
|
||||
|
||||
This mirrors the E-style path that is fast for the benchmark matrices and
|
||||
avoids torch's slower general-purpose SVD for many small/medium splits.
|
||||
"""
|
||||
|
||||
m_dim, n_dim = matrix.shape
|
||||
if m_dim <= n_dim:
|
||||
gram = matrix @ _conj(xp, matrix).T
|
||||
eigvals, eigvecs = _eigh(xp, gram)
|
||||
eigvals, eigvecs = _sort_eigh_desc(xp, eigvals, eigvecs)
|
||||
singvals = _sqrt_clamped(xp, eigvals)
|
||||
inv_s = _safe_inverse(xp, singvals)
|
||||
vh = (_conj(xp, eigvecs).T @ matrix) * inv_s.reshape(-1, 1)
|
||||
return eigvecs, singvals, vh
|
||||
|
||||
gram = _conj(xp, matrix).T @ matrix
|
||||
eigvals, eigvecs = _eigh(xp, gram)
|
||||
eigvals, eigvecs = _sort_eigh_desc(xp, eigvals, eigvecs)
|
||||
singvals = _sqrt_clamped(xp, eigvals)
|
||||
inv_s = _safe_inverse(xp, singvals)
|
||||
umat = (matrix @ eigvecs) * inv_s.reshape(1, -1)
|
||||
return umat, singvals, _conj(xp, eigvecs).T
|
||||
|
||||
|
||||
def _eigh(xp, matrix):
|
||||
if xp is np:
|
||||
return np.linalg.eigh(matrix)
|
||||
return xp.linalg.eigh(matrix)
|
||||
|
||||
|
||||
def _sort_eigh_desc(xp, eigvals, eigvecs):
|
||||
if xp is np:
|
||||
return eigvals[::-1].copy(), eigvecs[:, ::-1].copy()
|
||||
return xp.flip(eigvals, dims=(0,)), xp.flip(eigvecs, dims=(1,))
|
||||
|
||||
|
||||
def _sqrt_clamped(xp, eigvals):
|
||||
if xp is np:
|
||||
return np.sqrt(np.maximum(eigvals.real, 0.0))
|
||||
return xp.sqrt(xp.clamp(eigvals.real, min=0.0))
|
||||
|
||||
|
||||
def _safe_inverse(xp, values):
|
||||
if xp is np:
|
||||
return np.where(values > 1e-300, 1.0 / values, 0.0)
|
||||
return xp.where(values > 1e-300, 1.0 / values, xp.zeros_like(values))
|
||||
|
||||
|
||||
@dataclass
|
||||
class VidalTEBDExecutor:
|
||||
nqubits: int
|
||||
max_bond: int | None
|
||||
cut_ratio: float | None = 1e-12
|
||||
tensor_module: str = "torch"
|
||||
|
||||
def __post_init__(self):
|
||||
self.xp = _backend_module(self.tensor_module)
|
||||
if self.xp is np:
|
||||
self.dtype = np.complex128
|
||||
self.device = None
|
||||
else:
|
||||
self.dtype = self.xp.complex128
|
||||
self.device = self.xp.device("cpu")
|
||||
|
||||
self.gammas = []
|
||||
for _ in range(self.nqubits):
|
||||
tensor = _asarray(self.xp, [[[1.0 + 0.0j], [0.0 + 0.0j]]], self.dtype)
|
||||
self.gammas.append(tensor)
|
||||
self.lambdas = [
|
||||
_ones(self.xp, 1, self.dtype, self.device) for _ in range(self.nqubits + 1)
|
||||
]
|
||||
self._accumulated_truncation_error = 0.0
|
||||
self._max_truncation_error = 0.0
|
||||
|
||||
def run_circuit(self, circuit, compile_circuit=True):
|
||||
gates = circuit.queue
|
||||
if compile_circuit:
|
||||
gates = _route_non_adjacent_gates(gates, circuit.nqubits)
|
||||
gates = _fuse_one_site_blocks(gates)
|
||||
for batch in _disjoint_batches(gates):
|
||||
for gate in batch:
|
||||
self._apply_gate(gate)
|
||||
|
||||
@property
|
||||
def truncation_error(self):
|
||||
return self._accumulated_truncation_error
|
||||
|
||||
@property
|
||||
def max_truncation_error(self):
|
||||
return self._max_truncation_error
|
||||
|
||||
def _apply_gate(self, gate):
|
||||
sites = _gate_sites(gate)
|
||||
matrix = _asarray(self.xp, gate.matrix(), self.dtype)
|
||||
if len(sites) == 1:
|
||||
self.apply_one_site(matrix, sites[0])
|
||||
elif len(sites) == 2:
|
||||
if abs(sites[0] - sites[1]) != 1:
|
||||
raise NotImplementedError("VidalTEBDExecutor supports adjacent gates only.")
|
||||
self.apply_two_site(matrix, sites[0], sites[1])
|
||||
else:
|
||||
raise NotImplementedError("Only one- and two-qubit gates are supported.")
|
||||
|
||||
def apply_one_site(self, op, pos):
|
||||
# op[out_phys, in_phys] * gamma[left, in_phys, right]
|
||||
self.gammas[pos] = self.xp.einsum("st,atb->asb", op, self.gammas[pos])
|
||||
|
||||
def apply_two_site(self, op, left_pos, right_pos):
|
||||
item = self._build_two_site_matrix(op, left_pos, right_pos)
|
||||
umat, singvals, vh = _svd(self.xp, item["matrix"])
|
||||
self._install_two_site_split(item, umat, singvals, vh)
|
||||
|
||||
def _build_two_site_matrix(self, op, left_pos, right_pos):
|
||||
if left_pos > right_pos:
|
||||
left_pos, right_pos = right_pos, left_pos
|
||||
op = _transpose(self.xp, op.reshape(2, 2, 2, 2), (1, 0, 3, 2)).reshape(
|
||||
4, 4
|
||||
)
|
||||
|
||||
i = left_pos
|
||||
result = _build_theta_svd_matrix(
|
||||
op, self.xp,
|
||||
self.lambdas[i], self.lambdas[i + 1], self.lambdas[i + 2],
|
||||
self.gammas[i], self.gammas[i + 1],
|
||||
)
|
||||
result["site"] = i
|
||||
result["lam_left"] = self.lambdas[i]
|
||||
result["lam_right"] = self.lambdas[i + 2]
|
||||
return result
|
||||
|
||||
def _install_two_site_split(self, item, umat, singvals, vh):
|
||||
update = _make_two_site_update(item, umat, singvals, vh,
|
||||
self.max_bond, self.cut_ratio, self.xp)
|
||||
self._accumulated_truncation_error += update["truncation_error"]
|
||||
self._max_truncation_error = max(
|
||||
self._max_truncation_error,
|
||||
update["truncation_error"],
|
||||
)
|
||||
i = update["site"]
|
||||
self.gammas[i] = update["left"]
|
||||
self.gammas[i + 1] = update["right"]
|
||||
self.lambdas[i + 1] = update["lambda"]
|
||||
|
||||
def expectation_ring_xz(self):
|
||||
return self.expectation_pauli_sum(
|
||||
[
|
||||
(0.5, (("X", site), ("Z", (site + 1) % self.nqubits)))
|
||||
for site in range(self.nqubits)
|
||||
]
|
||||
)
|
||||
|
||||
def expectation_pauli_sum(self, terms):
|
||||
paulis = {
|
||||
"I": _eye(self.xp, 2, self.dtype, self.device),
|
||||
"X": _asarray(self.xp, [[0, 1], [1, 0]], self.dtype),
|
||||
"Y": _asarray(self.xp, [[0, -1j], [1j, 0]], self.dtype),
|
||||
"Z": _asarray(self.xp, [[1, 0], [0, -1]], self.dtype),
|
||||
}
|
||||
operator_terms = [
|
||||
(
|
||||
coeff,
|
||||
tuple((site, paulis[name.upper()]) for name, site in ops),
|
||||
)
|
||||
for coeff, ops in terms
|
||||
]
|
||||
return self.expectation_operator_sum(operator_terms)
|
||||
|
||||
def expectation_operator_sum(self, terms):
|
||||
value = 0.0 + 0.0j
|
||||
norm = self.norm()
|
||||
for coeff, ops in terms:
|
||||
operators = {
|
||||
int(site): _asarray(self.xp, matrix, self.dtype)
|
||||
for site, matrix in ops
|
||||
}
|
||||
if len(ops) == 0:
|
||||
term_value = norm
|
||||
elif len(operators) == 1:
|
||||
site, matrix = next(iter(operators.items()))
|
||||
term_value = _to_scalar(self._expect_one_site(site, matrix))
|
||||
elif len(operators) == 2 and abs(max(operators) - min(operators)) == 1:
|
||||
site0, site1 = sorted(operators)
|
||||
term_value = _to_scalar(
|
||||
self._expect_adjacent(site0, operators[site0], operators[site1])
|
||||
)
|
||||
else:
|
||||
term_value = _to_scalar(self.expect_product_operators(operators))
|
||||
value += complex(coeff) * complex(term_value)
|
||||
return _real_if_close(value / norm)
|
||||
|
||||
def _expect_one_site(self, site, op):
|
||||
theta = self.xp.einsum(
|
||||
"a,asb,b->asb",
|
||||
self.lambdas[site],
|
||||
self.gammas[site],
|
||||
self.lambdas[site + 1],
|
||||
)
|
||||
op_theta = self.xp.einsum("us,asb->aub", op, theta)
|
||||
return _vdot(self.xp, theta, op_theta)
|
||||
|
||||
def _expect_adjacent(self, site, op_left, op_right):
|
||||
theta = self.xp.einsum(
|
||||
"a,asb,b,btc,c->astc",
|
||||
self.lambdas[site],
|
||||
self.gammas[site],
|
||||
self.lambdas[site + 1],
|
||||
self.gammas[site + 1],
|
||||
self.lambdas[site + 2],
|
||||
)
|
||||
op_theta = self.xp.einsum("us,vt,astc->auvc", op_left, op_right, theta)
|
||||
return _vdot(self.xp, theta, op_theta)
|
||||
|
||||
def expect_product_operators(self, operators):
|
||||
env = _asarray(self.xp, [[1.0 + 0.0j]], self.dtype)
|
||||
identity = _eye(self.xp, 2, self.dtype, self.device)
|
||||
for site in range(self.nqubits):
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
op = operators.get(site, identity)
|
||||
env = self.xp.einsum(
|
||||
"xy,xsb,st,ytd->bd", env, _conj(self.xp, tensor), op, tensor
|
||||
)
|
||||
return env.reshape(-1)[0]
|
||||
|
||||
def norm(self):
|
||||
return float(np.real(_to_scalar(self.expect_product_operators({}))))
|
||||
|
||||
def expectation_mpo(self, mpo_tensors):
|
||||
"""Compute ``<psi|MPO|psi> / <psi|psi>``.
|
||||
|
||||
MPO tensors are expected in ``(left_bond, phys_out, phys_in, right_bond)``
|
||||
order, with physical dimension 2 on every site.
|
||||
"""
|
||||
if len(mpo_tensors) != self.nqubits:
|
||||
raise ValueError(
|
||||
f"Expected {self.nqubits} MPO tensors, got {len(mpo_tensors)}."
|
||||
)
|
||||
env = _asarray(self.xp, [[[1.0 + 0.0j]]], self.dtype)
|
||||
for site, raw_mpo in enumerate(mpo_tensors):
|
||||
mpo = _asarray(self.xp, raw_mpo, self.dtype)
|
||||
if mpo.ndim != 4 or mpo.shape[1:3] != (2, 2):
|
||||
raise ValueError(
|
||||
"Each MPO tensor must have shape "
|
||||
"(left_bond, 2, 2, right_bond)."
|
||||
)
|
||||
tensor = self.gammas[site] * self.lambdas[site + 1].reshape(1, 1, -1)
|
||||
env = self.xp.einsum(
|
||||
"xlc,xub,lutr,ctd->brd",
|
||||
env,
|
||||
_conj(self.xp, tensor),
|
||||
mpo,
|
||||
tensor,
|
||||
)
|
||||
return _real_if_close(_to_scalar(env.reshape(-1)[0]) / self.norm())
|
||||
|
||||
|
||||
def _build_theta_svd_matrix(op, xp, lam_left, lam_mid, lam_right, gamma_left, gamma_right):
|
||||
"""Merge and apply a two-site gate, returning the SVD-ready matrix."""
|
||||
theta = xp.einsum(
|
||||
"a,asb,b,btc,c->astc",
|
||||
lam_left, gamma_left, lam_mid, gamma_right, lam_right,
|
||||
)
|
||||
gate = op.reshape(2, 2, 2, 2)
|
||||
theta = xp.einsum("uvst,astc->auvc", gate, theta)
|
||||
chi_left = theta.shape[0]
|
||||
chi_right = theta.shape[3]
|
||||
return {
|
||||
"chi_left": chi_left,
|
||||
"chi_right": chi_right,
|
||||
"matrix": theta.reshape(chi_left * 2, 2 * chi_right),
|
||||
}
|
||||
|
||||
|
||||
def _choose_bond(singvals, max_bond, cut_ratio, xp):
|
||||
max_possible = int(singvals.shape[0])
|
||||
keep = max_possible if max_bond is None else min(max_possible, int(max_bond))
|
||||
if cut_ratio is not None and cut_ratio > 0 and max_possible > 0:
|
||||
threshold = singvals[0] * cut_ratio
|
||||
if xp is np:
|
||||
ratio_keep = int(np.count_nonzero(singvals > threshold))
|
||||
else:
|
||||
ratio_keep = int((singvals > threshold).sum().detach().cpu().item())
|
||||
keep = min(keep, max(1, ratio_keep))
|
||||
return keep
|
||||
|
||||
|
||||
def _divide_left_lambda(tensor, lambdas, xp):
|
||||
if xp is np:
|
||||
safe = np.where(np.abs(lambdas) > 1e-300, lambdas, 1.0)
|
||||
else:
|
||||
safe = xp.where(xp.abs(lambdas) > 1e-300, lambdas, xp.ones_like(lambdas))
|
||||
return tensor / safe.reshape(-1, 1, 1)
|
||||
|
||||
|
||||
def _divide_right_lambda(tensor, lambdas, xp):
|
||||
if xp is np:
|
||||
safe = np.where(np.abs(lambdas) > 1e-300, lambdas, 1.0)
|
||||
else:
|
||||
safe = xp.where(xp.abs(lambdas) > 1e-300, lambdas, xp.ones_like(lambdas))
|
||||
return tensor / safe.reshape(1, 1, -1)
|
||||
|
||||
|
||||
def _make_two_site_update(item, umat, singvals, vh, max_bond, cut_ratio, xp):
|
||||
keep = _choose_bond(singvals, max_bond, cut_ratio, xp)
|
||||
umat = umat[:, :keep]
|
||||
kept = singvals[:keep]
|
||||
cut = singvals[keep:]
|
||||
vh = vh[:keep, :]
|
||||
|
||||
discarded_weight = 0.0
|
||||
if cut.shape[0] > 0:
|
||||
norm_kept = (kept * kept).sum()
|
||||
norm_cut = (cut * cut).sum()
|
||||
discarded_weight = float(_to_float(norm_cut))
|
||||
kept = kept / xp.sqrt(norm_kept / (norm_kept + norm_cut))
|
||||
|
||||
new_left = umat.reshape(item["chi_left"], 2, keep)
|
||||
new_right = vh.reshape(keep, 2, item["chi_right"])
|
||||
new_left = _divide_left_lambda(new_left, item["lam_left"], xp)
|
||||
new_right = _divide_right_lambda(new_right, item["lam_right"], xp)
|
||||
return {
|
||||
"site": item["site"],
|
||||
"left": new_left,
|
||||
"right": new_right,
|
||||
"lambda": kept,
|
||||
"truncation_error": discarded_weight,
|
||||
}
|
||||
|
||||
|
||||
def _gate_sites(gate):
|
||||
controls = tuple(getattr(gate, "control_qubits", ()))
|
||||
targets = tuple(getattr(gate, "target_qubits", ()))
|
||||
if controls:
|
||||
return controls + targets
|
||||
return targets
|
||||
|
||||
|
||||
# ── SWAP routing for non-adjacent two-qubit gates ──────────────────────
|
||||
|
||||
class _SWAPGate:
|
||||
"""Minimal SWAP gate wrapper for routing non-adjacent gates."""
|
||||
name = "swap"
|
||||
control_qubits = ()
|
||||
|
||||
def __init__(self, left, right):
|
||||
self.target_qubits = (left, right)
|
||||
|
||||
def matrix(self):
|
||||
return np.array(
|
||||
[[1, 0, 0, 0],
|
||||
[0, 0, 1, 0],
|
||||
[0, 1, 0, 0],
|
||||
[0, 0, 0, 1]],
|
||||
dtype=complex,
|
||||
)
|
||||
|
||||
|
||||
class _RoutedTwoQubitGate:
|
||||
"""Wraps a two-qubit gate with remapped physical sites after SWAP routing."""
|
||||
name = "routed_two_qubit"
|
||||
control_qubits = ()
|
||||
|
||||
def __init__(self, original_gate, physical_sites):
|
||||
self.target_qubits = tuple(physical_sites)
|
||||
self._matrix = original_gate.matrix()
|
||||
|
||||
def matrix(self):
|
||||
return self._matrix
|
||||
|
||||
|
||||
def _route_non_adjacent_gates(gates, nqubits):
|
||||
"""Insert SWAP networks to make all two-qubit gates adjacent.
|
||||
|
||||
For each non-adjacent two-qubit gate, inserts SWAP gates to bring the
|
||||
farther qubit adjacent, applies the original gate, then inserts reverse
|
||||
SWAPs to restore the qubit ordering. The resulting gate sequence
|
||||
contains only adjacent two-qubit gates and is safe for VidalTEBDExecutor.
|
||||
"""
|
||||
routed = []
|
||||
for gate in gates:
|
||||
sites = _gate_sites(gate)
|
||||
if len(sites) <= 1:
|
||||
routed.append(gate)
|
||||
continue
|
||||
|
||||
left, right = sorted(sites)
|
||||
if right - left == 1:
|
||||
routed.append(gate)
|
||||
continue
|
||||
|
||||
# Move qubit 'right' leftwards to sit at left+1
|
||||
for pos in range(right - 1, left, -1):
|
||||
routed.append(_SWAPGate(pos, pos + 1))
|
||||
|
||||
# Apply the original gate in its original qubit order. For gates like
|
||||
# CNOT(5, 0), sorting the routed sites would swap control and target.
|
||||
physical_map = {left: left, right: left + 1}
|
||||
routed.append(_RoutedTwoQubitGate(gate, [physical_map[site] for site in sites]))
|
||||
|
||||
# Reverse SWAPs to restore original ordering
|
||||
for pos in range(left + 1, right):
|
||||
routed.append(_SWAPGate(pos, pos + 1))
|
||||
|
||||
return routed
|
||||
|
||||
|
||||
def _disjoint_batches(gates):
|
||||
batches = []
|
||||
current = []
|
||||
touched = set()
|
||||
current_arity = None
|
||||
for gate in gates:
|
||||
sites = _gate_sites(gate)
|
||||
arity = len(sites)
|
||||
site_set = set(sites)
|
||||
if current and (current_arity != arity or touched & site_set):
|
||||
batches.append(current)
|
||||
current = []
|
||||
touched = set()
|
||||
current_arity = None
|
||||
current.append(gate)
|
||||
touched |= site_set
|
||||
current_arity = arity
|
||||
if current:
|
||||
batches.append(current)
|
||||
return batches
|
||||
|
||||
|
||||
def _is_two_qubit_batch(batch):
|
||||
return batch and all(len(_gate_sites(gate)) == 2 for gate in batch)
|
||||
|
||||
|
||||
class _FusedOneSiteGate:
|
||||
name = "fused_one_site"
|
||||
|
||||
def __init__(self, site, matrix):
|
||||
self.target_qubits = (site,)
|
||||
self.control_qubits = ()
|
||||
self._matrix = matrix
|
||||
|
||||
def matrix(self):
|
||||
return self._matrix
|
||||
|
||||
|
||||
def _fuse_one_site_blocks(gates):
|
||||
fused = []
|
||||
block = []
|
||||
|
||||
def flush_block():
|
||||
nonlocal block
|
||||
if not block:
|
||||
return
|
||||
per_site = {}
|
||||
for gate in block:
|
||||
site = _gate_sites(gate)[0]
|
||||
mat = gate.matrix()
|
||||
if site in per_site:
|
||||
per_site[site] = mat @ per_site[site]
|
||||
else:
|
||||
per_site[site] = mat
|
||||
for site in sorted(per_site):
|
||||
fused.append(_FusedOneSiteGate(site, per_site[site]))
|
||||
block = []
|
||||
|
||||
for gate in gates:
|
||||
if len(_gate_sites(gate)) == 1:
|
||||
block.append(gate)
|
||||
continue
|
||||
flush_block()
|
||||
fused.append(gate)
|
||||
flush_block()
|
||||
return fused
|
||||
|
||||
|
||||
def run_vidal_ring_xz(
|
||||
circuit,
|
||||
max_bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module="torch",
|
||||
):
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=max_bond,
|
||||
cut_ratio=cut_ratio,
|
||||
tensor_module=tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return executor.expectation_ring_xz()
|
||||
@@ -1,151 +0,0 @@
|
||||
"""Reusable benchmark circuits and observables for expectation runs."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates
|
||||
|
||||
|
||||
CIRCUITS = (
|
||||
"brickwall_cnot",
|
||||
"reversed_cnot",
|
||||
"shifted_cz",
|
||||
"rxx_rzz",
|
||||
"swap_scramble",
|
||||
"ghz_ladder",
|
||||
)
|
||||
|
||||
OBSERVABLES = (
|
||||
"ring_xz",
|
||||
"open_zz",
|
||||
"mixed_local",
|
||||
"range2_xx",
|
||||
"long_z_string",
|
||||
)
|
||||
|
||||
|
||||
def parse_names(raw, valid, label):
|
||||
if raw == ["all"]:
|
||||
return list(valid)
|
||||
unknown = sorted(set(raw) - set(valid))
|
||||
if unknown:
|
||||
raise ValueError(f"Unknown {label}: {', '.join(unknown)}")
|
||||
return raw
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
if kind == "ghz_ladder":
|
||||
circuit.add(gates.H(0))
|
||||
for qubit in range(nqubits - 1):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
return circuit
|
||||
|
||||
for layer in range(nlayers):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind in ("rxx_rzz", "swap_scramble"):
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind == "brickwall_cnot":
|
||||
add_brickwall(circuit, nqubits, gates.CNOT, layer, reverse=False)
|
||||
elif kind == "reversed_cnot":
|
||||
add_brickwall(circuit, nqubits, gates.CNOT, layer, reverse=True)
|
||||
elif kind == "shifted_cz":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(qubit, qubit + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
elif kind == "swap_scramble":
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(qubit, qubit + 1))
|
||||
if layer % 4 == 3:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def add_brickwall(circuit, nqubits, gate, layer, reverse):
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
if reverse and layer % 2:
|
||||
circuit.add(gate(qubit + 1, qubit))
|
||||
else:
|
||||
circuit.add(gate(qubit, qubit + 1))
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
if reverse and not layer % 2:
|
||||
circuit.add(gate(qubit + 1, qubit))
|
||||
else:
|
||||
circuit.add(gate(qubit, qubit + 1))
|
||||
|
||||
|
||||
def observable_terms(kind, nqubits):
|
||||
if kind == "ring_xz":
|
||||
return [
|
||||
(0.5, (("X", site), ("Z", (site + 1) % nqubits)))
|
||||
for site in range(nqubits)
|
||||
]
|
||||
if kind == "open_zz":
|
||||
return [
|
||||
(1.0 / (nqubits - 1), (("Z", site), ("Z", site + 1)))
|
||||
for site in range(nqubits - 1)
|
||||
]
|
||||
if kind == "mixed_local":
|
||||
terms = [(0.25, (("X", 0),)), (-0.5, (("Z", nqubits - 1),))]
|
||||
terms += [
|
||||
(0.125, (("Y", site), ("Y", site + 1)))
|
||||
for site in range(0, nqubits - 1, 3)
|
||||
]
|
||||
return terms
|
||||
if kind == "range2_xx":
|
||||
return [
|
||||
(1.0 / max(1, nqubits - 2), (("X", site), ("X", site + 2)))
|
||||
for site in range(nqubits - 2)
|
||||
]
|
||||
if kind == "long_z_string":
|
||||
stride = max(1, nqubits // 16)
|
||||
return [(1.0, tuple(("Z", site) for site in range(0, nqubits, stride)))]
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def terms_to_dict(terms):
|
||||
return {
|
||||
"terms": [
|
||||
{
|
||||
"coefficient": float(np.real(coeff)),
|
||||
"operators": [(name, int(site)) for name, site in ops],
|
||||
}
|
||||
for coeff, ops in terms
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def exact_pauli_sum(circuit, terms, nqubits):
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
indices = np.arange(state.size, dtype=np.int64)
|
||||
value = 0.0 + 0.0j
|
||||
for coeff, ops in terms:
|
||||
flipped = indices.copy()
|
||||
phase = np.ones(state.size, dtype=np.complex128)
|
||||
for name, site in ops:
|
||||
shift = nqubits - 1 - site
|
||||
bit = (indices >> shift) & 1
|
||||
if name == "X":
|
||||
flipped ^= 1 << shift
|
||||
elif name == "Y":
|
||||
flipped ^= 1 << shift
|
||||
phase *= 1j * (1 - 2 * bit)
|
||||
elif name == "Z":
|
||||
phase *= 1 - 2 * bit
|
||||
elif name != "I":
|
||||
raise ValueError(f"Unsupported Pauli {name!r}.")
|
||||
value += coeff * np.vdot(state[flipped], phase * state)
|
||||
return float(value.real)
|
||||
@@ -1,19 +1,6 @@
|
||||
import cupy as cp
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
|
||||
|
||||
def _require_cupy():
|
||||
if cp is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum circuit converter requires cupy. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
return cp
|
||||
|
||||
# Reference: https://github.com/NVIDIA/cuQuantum/tree/main/python/samples/cutensornet/circuit_converter
|
||||
|
||||
|
||||
@@ -32,7 +19,7 @@ class QiboCircuitToEinsum:
|
||||
"""
|
||||
|
||||
def __init__(self, circuit, dtype="complex128"):
|
||||
self.backend = _require_cupy()
|
||||
self.backend = cp
|
||||
self.dtype = getattr(self.backend, dtype)
|
||||
self.init_basis_map(self.backend, dtype)
|
||||
self.init_intermediate_circuit(circuit)
|
||||
@@ -129,9 +116,7 @@ class QiboCircuitToEinsum:
|
||||
required_shape = self.op_shape_from_qubits(len(gate_qubits))
|
||||
self.gate_tensors.append(
|
||||
(
|
||||
self.backend.asarray(gate.matrix(), dtype=self.dtype).reshape(
|
||||
required_shape
|
||||
),
|
||||
cp.asarray(gate.matrix(), dtype=self.dtype).reshape(required_shape),
|
||||
gate_qubits,
|
||||
)
|
||||
)
|
||||
@@ -176,7 +161,7 @@ class QiboCircuitToEinsum:
|
||||
required_shape = self.op_shape_from_qubits(len(gate_qubits))
|
||||
self.gate_tensors_inverse.append(
|
||||
(
|
||||
self.backend.asarray(gate.matrix()).reshape(required_shape),
|
||||
cp.asarray(gate.matrix()).reshape(required_shape),
|
||||
gate_qubits,
|
||||
)
|
||||
)
|
||||
@@ -184,7 +169,7 @@ class QiboCircuitToEinsum:
|
||||
# self.active_qubits is to identify qubits with at least 1 gate acting on it in the whole circuit.
|
||||
self.active_qubits_inverse = np.unique(gates_qubits_inverse)
|
||||
|
||||
def get_pauli_gates(self, pauli_map, dtype="complex128", backend=None):
|
||||
def get_pauli_gates(self, pauli_map, dtype="complex128", backend=cp):
|
||||
"""Populate the gates for all pauli operators.
|
||||
|
||||
Parameters:
|
||||
@@ -195,8 +180,6 @@ class QiboCircuitToEinsum:
|
||||
Returns:
|
||||
A sequence of pauli gates.
|
||||
"""
|
||||
if backend is None:
|
||||
backend = _require_cupy()
|
||||
asarray = backend.asarray
|
||||
pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
|
||||
pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
|
||||
|
||||
@@ -1,23 +1,10 @@
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
import numpy as np
|
||||
|
||||
from qibotn.circuit_convertor import QiboCircuitToEinsum
|
||||
from qibotn.mps_utils import apply_gate, initial
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
cutn = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if cp is None or cutn is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS converter requires cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
|
||||
|
||||
class QiboCircuitToMPS:
|
||||
"""A helper class to convert Qibo circuit to MPS.
|
||||
@@ -36,7 +23,6 @@ class QiboCircuitToMPS:
|
||||
dtype="complex128",
|
||||
rand_seed=0,
|
||||
):
|
||||
_require_cuquantum()
|
||||
np.random.seed(rand_seed)
|
||||
cp.random.seed(rand_seed)
|
||||
|
||||
@@ -58,6 +44,4 @@ class QiboCircuitToMPS:
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
handle = getattr(self, "handle", None)
|
||||
if cutn is not None and handle is not None:
|
||||
cutn.destroy(handle)
|
||||
cutn.destroy(self.handle)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,46 +1,89 @@
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
from cupy.cuda import nccl
|
||||
from cupy.cuda.runtime import getDeviceCount
|
||||
from cuquantum.tensornet import Network, contract
|
||||
from mpi4py import MPI
|
||||
from qibo import hamiltonians
|
||||
from qibo.symbols import I, X, Y, Z
|
||||
|
||||
from qibotn.circuit_convertor import QiboCircuitToEinsum
|
||||
from qibotn.circuit_to_mps import QiboCircuitToMPS
|
||||
from qibotn.mps_contraction_helper import MPSContractionHelper
|
||||
from qibotn.observables import (
|
||||
build_observable,
|
||||
check_observable,
|
||||
create_hamiltonian_from_dict,
|
||||
extract_gates_and_qubits,
|
||||
)
|
||||
|
||||
try:
|
||||
import cupy as cp
|
||||
import cuquantum.bindings.cutensornet as cutn
|
||||
from cupy.cuda import nccl
|
||||
from cupy.cuda.runtime import getDeviceCount
|
||||
from cuquantum.tensornet import Network, contract
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
cutn = None
|
||||
nccl = None
|
||||
getDeviceCount = None
|
||||
Network = None
|
||||
contract = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if (
|
||||
cp is None
|
||||
or cutn is None
|
||||
or nccl is None
|
||||
or getDeviceCount is None
|
||||
or Network is None
|
||||
or contract is None
|
||||
):
|
||||
raise ImportError(
|
||||
"The legacy GPU evaluation helpers require cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
def check_observable(observable, circuit_nqubit):
|
||||
"""Checks the type of observable and returns the appropriate Hamiltonian."""
|
||||
if observable is None:
|
||||
return build_observable(circuit_nqubit)
|
||||
elif isinstance(observable, dict):
|
||||
return create_hamiltonian_from_dict(observable, circuit_nqubit)
|
||||
elif isinstance(observable, hamiltonians.SymbolicHamiltonian):
|
||||
# TODO: check if the observable is compatible with the circuit
|
||||
return observable
|
||||
else:
|
||||
raise TypeError("Invalid observable type.")
|
||||
|
||||
|
||||
def get_ham_gates(pauli_map, dtype="complex128", backend=None):
|
||||
def build_observable(circuit_nqubit):
|
||||
"""Helper function to construct a target observable."""
|
||||
hamiltonian_form = 0
|
||||
for i in range(circuit_nqubit):
|
||||
hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit)
|
||||
|
||||
hamiltonian = hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
|
||||
return hamiltonian
|
||||
|
||||
|
||||
def create_hamiltonian_from_dict(data, circuit_nqubit):
|
||||
"""Create a Qibo SymbolicHamiltonian from a dictionary representation.
|
||||
|
||||
Ensures that each Hamiltonian term explicitly acts on all circuit qubits
|
||||
by adding identity (`I`) gates where needed.
|
||||
|
||||
Args:
|
||||
data (dict): Dictionary containing Hamiltonian terms.
|
||||
circuit_nqubit (int): Total number of qubits in the quantum circuit.
|
||||
|
||||
Returns:
|
||||
hamiltonians.SymbolicHamiltonian: The constructed Hamiltonian.
|
||||
"""
|
||||
PAULI_GATES = {"X": X, "Y": Y, "Z": Z}
|
||||
|
||||
terms = []
|
||||
|
||||
for term in data["terms"]:
|
||||
coeff = term["coefficient"]
|
||||
operators = term["operators"] # List of tuples like [("Z", 0), ("X", 1)]
|
||||
|
||||
# Convert the operator list into a dictionary {qubit_index: gate}
|
||||
operator_dict = {q: PAULI_GATES[g] for g, q in operators}
|
||||
|
||||
# Build the full term ensuring all qubits are covered
|
||||
full_term_expr = [
|
||||
operator_dict[q](q) if q in operator_dict else I(q)
|
||||
for q in range(circuit_nqubit)
|
||||
]
|
||||
|
||||
# Multiply all operators together to form a single term
|
||||
term_expr = full_term_expr[0]
|
||||
for op in full_term_expr[1:]:
|
||||
term_expr *= op
|
||||
|
||||
# Scale by the coefficient
|
||||
final_term = coeff * term_expr
|
||||
terms.append(final_term)
|
||||
|
||||
if not terms:
|
||||
raise ValueError("No valid Hamiltonian terms were added.")
|
||||
|
||||
# Combine all terms
|
||||
hamiltonian_form = sum(terms)
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(hamiltonian_form)
|
||||
|
||||
|
||||
def get_ham_gates(pauli_map, dtype="complex128", backend=cp):
|
||||
"""Populate the gates for all pauli operators.
|
||||
|
||||
Parameters:
|
||||
@@ -51,13 +94,6 @@ def get_ham_gates(pauli_map, dtype="complex128", backend=None):
|
||||
Returns:
|
||||
A sequence of pauli gates.
|
||||
"""
|
||||
if backend is None:
|
||||
backend = cp
|
||||
if backend is None:
|
||||
raise ImportError(
|
||||
"get_ham_gates requires an array backend; cupy is unavailable "
|
||||
"in this CPU-only environment."
|
||||
)
|
||||
asarray = backend.asarray
|
||||
pauli_i = asarray([[1, 0], [0, 1]], dtype=dtype)
|
||||
pauli_x = asarray([[0, 1], [1, 0]], dtype=dtype)
|
||||
@@ -75,9 +111,47 @@ def get_ham_gates(pauli_map, dtype="complex128", backend=None):
|
||||
return gates
|
||||
|
||||
|
||||
def extract_gates_and_qubits(hamiltonian):
|
||||
"""
|
||||
Extracts the gates and their corresponding qubits from a Qibo Hamiltonian.
|
||||
|
||||
Parameters:
|
||||
hamiltonian (qibo.hamiltonians.Hamiltonian or qibo.hamiltonians.SymbolicHamiltonian):
|
||||
A Qibo Hamiltonian object.
|
||||
|
||||
Returns:
|
||||
list of tuples: [(coefficient, [(gate, qubit), ...]), ...]
|
||||
- coefficient: The prefactor of the term.
|
||||
- list of (gate, qubit): Each term's gates and the qubits they act on.
|
||||
"""
|
||||
extracted_terms = []
|
||||
|
||||
if isinstance(hamiltonian, hamiltonians.SymbolicHamiltonian):
|
||||
for term in hamiltonian.terms:
|
||||
coeff = term.coefficient # Extract coefficient
|
||||
gate_qubit_list = []
|
||||
|
||||
# Extract gate and qubit information
|
||||
for factor in term.factors:
|
||||
gate_name = str(factor)[
|
||||
0
|
||||
] # Extract the gate type (X, Y, Z) from 'X0', 'Z1'
|
||||
qubit = int(str(factor)[1:]) # Extract the qubit index
|
||||
gate_qubit_list.append((qubit, gate_name, coeff))
|
||||
coeff = 1.0
|
||||
|
||||
extracted_terms.append(gate_qubit_list)
|
||||
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported Hamiltonian type. Must be SymbolicHamiltonian or Hamiltonian."
|
||||
)
|
||||
|
||||
return extracted_terms
|
||||
|
||||
|
||||
def initialize_mpi():
|
||||
"""Initialize MPI communication and device selection."""
|
||||
_require_cuquantum()
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
@@ -88,7 +162,6 @@ def initialize_mpi():
|
||||
|
||||
def initialize_nccl(comm_mpi, rank, size):
|
||||
"""Initialize NCCL communication."""
|
||||
_require_cuquantum()
|
||||
nccl_id = nccl.get_unique_id() if rank == 0 else None
|
||||
nccl_id = comm_mpi.bcast(nccl_id, root=0)
|
||||
return nccl.NcclCommunicator(size, nccl_id, rank)
|
||||
@@ -106,7 +179,6 @@ def get_operands(qibo_circ, datatype, rank, comm):
|
||||
|
||||
def compute_optimal_path(network, n_samples, size, comm):
|
||||
"""Compute contraction path and broadcast optimal selection."""
|
||||
_require_cuquantum()
|
||||
path, info = network.contract_path(
|
||||
optimize={
|
||||
"samples": n_samples,
|
||||
@@ -135,8 +207,6 @@ def compute_slices(info, rank, size):
|
||||
|
||||
def reduce_result(result, comm, method="MPI", root=0):
|
||||
"""Reduce results across processes."""
|
||||
if method == "NCCL":
|
||||
_require_cuquantum()
|
||||
if method == "MPI":
|
||||
return comm.reduce(sendobj=result, op=MPI.SUM, root=root)
|
||||
|
||||
@@ -184,7 +254,6 @@ def dense_vector_tn_MPI(qibo_circ, datatype, n_samples=8):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
comm, rank, size, device_id = initialize_mpi()
|
||||
operands = get_operands(qibo_circ, datatype, rank, comm)
|
||||
network = Network(*operands, options={"device_id": device_id})
|
||||
@@ -216,7 +285,6 @@ def dense_vector_tn_nccl(qibo_circ, datatype, n_samples=8):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
comm_mpi, rank, size, device_id = initialize_mpi()
|
||||
comm_nccl = initialize_nccl(comm_mpi, rank, size)
|
||||
operands = get_operands(qibo_circ, datatype, rank, comm_mpi)
|
||||
@@ -241,7 +309,6 @@ def dense_vector_tn(qibo_circ, datatype):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
return contract(*myconvertor.state_vector_operands())
|
||||
|
||||
@@ -270,7 +337,6 @@ def expectation_tn_nccl(qibo_circ, datatype, observable, n_samples=8):
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
|
||||
_require_cuquantum()
|
||||
comm_mpi, rank, size, device_id = initialize_mpi()
|
||||
|
||||
comm_nccl = initialize_nccl(comm_mpi, rank, size)
|
||||
@@ -339,7 +405,6 @@ def expectation_tn_MPI(qibo_circ, datatype, observable, n_samples=8):
|
||||
Returns:
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
# Initialize MPI and device
|
||||
comm, rank, size, device_id = initialize_mpi()
|
||||
|
||||
@@ -399,7 +464,6 @@ def expectation_tn(qibo_circ, datatype, observable):
|
||||
Returns:
|
||||
Expectation of quantum circuit due to pauli string.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
myconvertor = QiboCircuitToEinsum(qibo_circ, dtype=datatype)
|
||||
|
||||
observable = check_observable(observable, qibo_circ.nqubits)
|
||||
@@ -425,7 +489,6 @@ def dense_vector_mps(qibo_circ, gate_algo, datatype):
|
||||
Returns:
|
||||
Dense vector of quantum circuit.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
myconvertor = QiboCircuitToMPS(qibo_circ, gate_algo, dtype=datatype)
|
||||
mps_helper = MPSContractionHelper(myconvertor.num_qubits)
|
||||
|
||||
|
||||
@@ -1,82 +0,0 @@
|
||||
"""High-level CPU expectation runner used by CLI scripts."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
|
||||
import numpy as np
|
||||
from qibo.backends import construct_backend
|
||||
|
||||
from qibotn.benchmark_cases import exact_pauli_sum
|
||||
from qibotn.observables import check_observable
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExpectationConfig:
|
||||
ansatz: str = "tn"
|
||||
mpi: bool = False
|
||||
bond: int | None = 1024
|
||||
cut_ratio: float | None = 1e-12
|
||||
tensor_module: str = "torch"
|
||||
quimb_backend: str = "torch"
|
||||
dtype: str = "complex128"
|
||||
torch_threads: int = 8
|
||||
parallel_opts: dict | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExpectationResult:
|
||||
value: float
|
||||
seconds: float
|
||||
rank: int = 0
|
||||
parallel_stats: list | None = None
|
||||
|
||||
|
||||
def exact_for_observable(circuit, observable, nqubits):
|
||||
if isinstance(observable, dict) and "terms" in observable:
|
||||
terms = [
|
||||
(
|
||||
term["coefficient"],
|
||||
tuple((name, site) for name, site in term["operators"]),
|
||||
)
|
||||
for term in observable["terms"]
|
||||
]
|
||||
return exact_pauli_sum(circuit, terms, nqubits)
|
||||
|
||||
hamiltonian = check_observable(observable, nqubits)
|
||||
return float(hamiltonian.expectation_from_state(circuit().state(numpy=True)).real)
|
||||
|
||||
|
||||
def run_cpu_expectation(circuit, observable, config):
|
||||
runcard = {
|
||||
"MPI_enabled": config.mpi,
|
||||
"MPS_enabled": config.ansatz.lower() == "mps",
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
"max_bond_dimension": config.bond,
|
||||
"cut_ratio": config.cut_ratio,
|
||||
"tensor_module": config.tensor_module,
|
||||
"quimb_backend": config.quimb_backend,
|
||||
"dtype": config.dtype,
|
||||
"torch_threads": config.torch_threads,
|
||||
"parallel_opts": config.parallel_opts or {},
|
||||
}
|
||||
backend = construct_backend(
|
||||
backend="qibotn",
|
||||
platform="cpu",
|
||||
runcard=runcard,
|
||||
)
|
||||
|
||||
start = time.perf_counter()
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
rank = getattr(backend, "rank", 0)
|
||||
stats = getattr(backend, "parallel_stats", None)
|
||||
return ExpectationResult(
|
||||
float(np.real(value)),
|
||||
elapsed,
|
||||
rank=rank,
|
||||
parallel_stats=list(stats) if stats is not None else None,
|
||||
)
|
||||
@@ -1,16 +1,4 @@
|
||||
try:
|
||||
from cuquantum.tensornet import contract, contract_path
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
contract = None
|
||||
contract_path = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if contract is None or contract_path is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS contraction helper requires cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
from cuquantum.tensornet import contract, contract_path
|
||||
|
||||
# Reference: https://github.com/NVIDIA/cuQuantum/blob/main/python/samples/cutensornet/tn_algorithms/mps_algorithms.ipynb
|
||||
|
||||
@@ -125,7 +113,6 @@ class MPSContractionHelper:
|
||||
return self._contract(interleaved_inputs, options=options) / norm
|
||||
|
||||
def _contract(self, interleaved_inputs, options=None):
|
||||
_require_cuquantum()
|
||||
path = contract_path(*interleaved_inputs, options=options)[0]
|
||||
|
||||
return contract(*interleaved_inputs, options=options, optimize={"path": path})
|
||||
|
||||
@@ -1,19 +1,6 @@
|
||||
try:
|
||||
import cupy as cp
|
||||
from cuquantum.tensornet import contract
|
||||
from cuquantum.tensornet.experimental import contract_decompose
|
||||
except ImportError: # pragma: no cover - exercised on CPU-only installations
|
||||
cp = None
|
||||
contract = None
|
||||
contract_decompose = None
|
||||
|
||||
|
||||
def _require_cuquantum():
|
||||
if cp is None or contract is None or contract_decompose is None:
|
||||
raise ImportError(
|
||||
"The cuQuantum MPS helpers require cupy and cuquantum. "
|
||||
"Install the GPU dependencies or use the CPU backend."
|
||||
)
|
||||
import cupy as cp
|
||||
from cuquantum.tensornet import contract
|
||||
from cuquantum.tensornet.experimental import contract_decompose
|
||||
|
||||
|
||||
def initial(num_qubits, dtype):
|
||||
@@ -26,7 +13,6 @@ def initial(num_qubits, dtype):
|
||||
Returns:
|
||||
The initial MPS tensors.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
state_tensor = cp.asarray([1, 0], dtype=dtype).reshape(1, 2, 1)
|
||||
mps_tensors = [state_tensor] * num_qubits
|
||||
return mps_tensors
|
||||
@@ -42,7 +28,6 @@ def mps_site_right_swap(mps_tensors, i, **kwargs):
|
||||
Returns:
|
||||
The updated MPS tensors.
|
||||
"""
|
||||
_require_cuquantum()
|
||||
# contraction followed by QR decomposition
|
||||
a, _, b = contract_decompose(
|
||||
"ipj,jqk->iqj,jpk",
|
||||
@@ -75,7 +60,6 @@ def apply_gate(mps_tensors, gate, qubits, **kwargs):
|
||||
The updated MPS tensors.
|
||||
"""
|
||||
|
||||
_require_cuquantum()
|
||||
n_qubits = len(qubits)
|
||||
if n_qubits == 1:
|
||||
# single-qubit gate
|
||||
|
||||
@@ -1,126 +0,0 @@
|
||||
"""Observable helpers shared by tensor-network backends and benchmarks."""
|
||||
|
||||
from qibo import hamiltonians
|
||||
from qibo.symbols import I, X, Y, Z
|
||||
|
||||
|
||||
def check_observable(observable, circuit_nqubit):
|
||||
"""Checks the type of observable and returns the appropriate Hamiltonian."""
|
||||
if observable is None:
|
||||
return build_observable(circuit_nqubit)
|
||||
if isinstance(observable, dict):
|
||||
return create_hamiltonian_from_dict(observable, circuit_nqubit)
|
||||
if isinstance(observable, hamiltonians.SymbolicHamiltonian):
|
||||
return observable
|
||||
try:
|
||||
return hamiltonians.SymbolicHamiltonian(form=observable)
|
||||
except Exception as exc:
|
||||
raise TypeError("Invalid observable type.") from exc
|
||||
|
||||
|
||||
def build_observable(circuit_nqubit):
|
||||
"""Construct the default benchmark observable used by qibotn."""
|
||||
hamiltonian_form = 0
|
||||
for i in range(circuit_nqubit):
|
||||
hamiltonian_form += 0.5 * X(i % circuit_nqubit) * Z((i + 1) % circuit_nqubit)
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(form=hamiltonian_form)
|
||||
|
||||
|
||||
def create_hamiltonian_from_dict(data, circuit_nqubit):
|
||||
"""Create a Qibo SymbolicHamiltonian from the qibotn dict representation."""
|
||||
if "pauli_string_pattern" in data:
|
||||
return create_hamiltonian_from_pauli_pattern(
|
||||
data["pauli_string_pattern"], circuit_nqubit
|
||||
)
|
||||
|
||||
pauli_gates = {"X": X, "Y": Y, "Z": Z}
|
||||
terms = []
|
||||
|
||||
for term in data["terms"]:
|
||||
coeff = term["coefficient"]
|
||||
operators = term["operators"]
|
||||
operator_dict = {q: pauli_gates[g] for g, q in operators}
|
||||
|
||||
full_term_expr = [
|
||||
operator_dict[q](q) if q in operator_dict else I(q)
|
||||
for q in range(circuit_nqubit)
|
||||
]
|
||||
|
||||
term_expr = full_term_expr[0]
|
||||
for op in full_term_expr[1:]:
|
||||
term_expr *= op
|
||||
|
||||
terms.append(coeff * term_expr)
|
||||
|
||||
if not terms:
|
||||
raise ValueError("No valid Hamiltonian terms were added.")
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(sum(terms))
|
||||
|
||||
|
||||
def create_hamiltonian_from_pauli_pattern(pattern, circuit_nqubit):
|
||||
"""Create a single Pauli-string Hamiltonian by repeating ``pattern``.
|
||||
|
||||
Example: pattern ``"IXZ"`` on 5 qubits becomes ``I0 * X1 * Z2 * I3 * X4``.
|
||||
Identity factors are omitted except for the all-identity case.
|
||||
"""
|
||||
if not isinstance(pattern, str) or not pattern:
|
||||
raise ValueError("pauli_string_pattern must be a non-empty string.")
|
||||
|
||||
pauli_gates = {"X": X, "Y": Y, "Z": Z}
|
||||
pattern = pattern.upper()
|
||||
invalid = sorted(set(pattern) - {"I", "X", "Y", "Z"})
|
||||
if invalid:
|
||||
raise ValueError(
|
||||
"pauli_string_pattern characters must be one of I/X/Y/Z; "
|
||||
f"got {''.join(invalid)!r}."
|
||||
)
|
||||
|
||||
expr = None
|
||||
for qubit in range(circuit_nqubit):
|
||||
name = pattern[qubit % len(pattern)]
|
||||
if name == "I":
|
||||
continue
|
||||
factor = pauli_gates[name](qubit)
|
||||
expr = factor if expr is None else expr * factor
|
||||
|
||||
if expr is None:
|
||||
expr = I(0)
|
||||
|
||||
return hamiltonians.SymbolicHamiltonian(form=expr)
|
||||
|
||||
|
||||
def build_random_circuit(nqubits, nlayers, seed=42):
|
||||
"""Build a random circuit with RY+RZ+CNOT layers for benchmarks."""
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates
|
||||
np.random.seed(seed)
|
||||
c = Circuit(nqubits)
|
||||
for _ in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
c.add(gates.RY(q, theta=np.random.uniform(0, 2*np.pi)))
|
||||
c.add(gates.RZ(q, theta=np.random.uniform(0, 2*np.pi)))
|
||||
for q in range(nqubits):
|
||||
c.add(gates.CNOT(q % nqubits, (q + 1) % nqubits))
|
||||
return c
|
||||
|
||||
|
||||
def extract_gates_and_qubits(hamiltonian):
|
||||
"""Extract per-term Pauli factors from a Qibo SymbolicHamiltonian.
|
||||
|
||||
Returns list of terms, where each term is (coefficient, [(qubit, gate_name), ...]).
|
||||
"""
|
||||
extracted_terms = []
|
||||
|
||||
if not isinstance(hamiltonian, hamiltonians.SymbolicHamiltonian):
|
||||
raise ValueError(
|
||||
"Unsupported Hamiltonian type. Must be SymbolicHamiltonian or Hamiltonian."
|
||||
)
|
||||
|
||||
for term in hamiltonian.terms:
|
||||
coeff = term.coefficient
|
||||
factors = [(int(str(f)[1:]), str(f)[0]) for f in term.factors]
|
||||
extracted_terms.append((coeff, factors))
|
||||
|
||||
return extracted_terms
|
||||
@@ -1,773 +0,0 @@
|
||||
"""Parallel path search and contraction utilities for tensor networks."""
|
||||
import os
|
||||
import pickle
|
||||
import signal
|
||||
import time
|
||||
from math import log2, log10
|
||||
import numpy as np
|
||||
from dataclasses import dataclass
|
||||
from concurrent.futures import ProcessPoolExecutor, TimeoutError, as_completed
|
||||
|
||||
try:
|
||||
from mpi4py import MPI
|
||||
_HAVE_MPI = True
|
||||
except ImportError:
|
||||
_HAVE_MPI = False
|
||||
MPI = None
|
||||
|
||||
|
||||
SEARCH_METHODS = ("greedy", "kahypar", "kahypar-agglom", "spinglass")
|
||||
_COTENGRA_DASK_PATCHED = False
|
||||
_COTENGRA_DASK_SUBMIT_PATCHED = False
|
||||
_DASK_TRIAL_DEBUG = False
|
||||
|
||||
|
||||
def _optimizer_search_stats(opt):
|
||||
scores = list(getattr(opt, "scores", ()))
|
||||
finite_scores = [score for score in scores if np.isfinite(score)]
|
||||
times = list(getattr(opt, "times", ()))
|
||||
best = getattr(opt, "best", {}) or {}
|
||||
return {
|
||||
"completed_trials": len(scores),
|
||||
"finite_trials": len(finite_scores),
|
||||
"failed_trials": len(scores) - len(finite_scores),
|
||||
"requested_trials": int(getattr(opt, "max_repeats", 0) or 0),
|
||||
"trial_seconds_sum": float(sum(times)),
|
||||
"best_score": float(best.get("score", float("inf"))),
|
||||
"best_flops": float(best.get("flops", float("inf"))),
|
||||
"best_write": float(best.get("write", float("inf"))),
|
||||
"best_size": float(best.get("size", float("inf"))),
|
||||
}
|
||||
|
||||
|
||||
def _attach_search_stats(tree, opt):
|
||||
try:
|
||||
tree.qibotn_search_stats = _optimizer_search_stats(opt)
|
||||
except Exception:
|
||||
pass
|
||||
return tree
|
||||
|
||||
|
||||
def _dask_worker_slots(client):
|
||||
info = client.scheduler_info(n_workers=-1)
|
||||
workers = info.get("workers", {})
|
||||
return workers, sum(int(w.get("nthreads", 1) or 1) for w in workers.values())
|
||||
|
||||
|
||||
def _print_dask_worker_summary(client):
|
||||
workers, slots = _dask_worker_slots(client)
|
||||
by_host = {}
|
||||
for worker in workers.values():
|
||||
host = worker.get("host", "unknown")
|
||||
by_host.setdefault(host, {"workers": 0, "threads": 0})
|
||||
by_host[host]["workers"] += 1
|
||||
by_host[host]["threads"] += int(worker.get("nthreads", 1) or 1)
|
||||
print(
|
||||
"qibotn_dask_workers "
|
||||
f"workers={len(workers)} threads={slots} by_host={by_host}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def _run_trial_with_debug(fn, *args, **kwargs):
|
||||
import os
|
||||
import socket
|
||||
|
||||
try:
|
||||
from distributed import get_worker
|
||||
|
||||
worker = get_worker()
|
||||
worker_address = worker.address
|
||||
except Exception:
|
||||
worker_address = "unknown"
|
||||
|
||||
method = kwargs.get("method", "unknown")
|
||||
pid = os.getpid()
|
||||
host = socket.gethostname()
|
||||
print(
|
||||
"qibotn_trial_start "
|
||||
f"worker={worker_address} host={host} pid={pid} method={method}",
|
||||
flush=True,
|
||||
)
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
trial = fn(*args, **kwargs)
|
||||
except Exception as exc:
|
||||
elapsed = time.perf_counter() - start
|
||||
print(
|
||||
"qibotn_trial_error "
|
||||
f"worker={worker_address} host={host} pid={pid} "
|
||||
f"method={method} seconds={elapsed:.3f} error={exc!r}",
|
||||
flush=True,
|
||||
)
|
||||
raise
|
||||
elapsed = time.perf_counter() - start
|
||||
print(
|
||||
"qibotn_trial_done "
|
||||
f"worker={worker_address} host={host} pid={pid} method={method} "
|
||||
f"seconds={elapsed:.3f} score={trial.get('score', float('nan')):.6g} "
|
||||
f"flops={trial.get('flops', float('nan')):.6g} "
|
||||
f"size={trial.get('size', float('nan')):.6g}",
|
||||
flush=True,
|
||||
)
|
||||
return trial
|
||||
|
||||
|
||||
def _patch_cotengra_dask_submit(debug_trials=False):
|
||||
global _COTENGRA_DASK_SUBMIT_PATCHED, _DASK_TRIAL_DEBUG
|
||||
_DASK_TRIAL_DEBUG = bool(debug_trials)
|
||||
if _COTENGRA_DASK_SUBMIT_PATCHED:
|
||||
return
|
||||
|
||||
import cotengra.parallel as ctg_parallel
|
||||
import cotengra.hyperoptimizers.hyper as hyper
|
||||
|
||||
original_submit = ctg_parallel.submit
|
||||
|
||||
def submit(pool, fn, *args, **kwargs):
|
||||
backend = pool.__class__.__module__.split(".", 1)[0]
|
||||
if _DASK_TRIAL_DEBUG and backend == "distributed":
|
||||
return original_submit(
|
||||
pool,
|
||||
_run_trial_with_debug,
|
||||
fn,
|
||||
*args,
|
||||
**kwargs,
|
||||
)
|
||||
return original_submit(pool, fn, *args, **kwargs)
|
||||
|
||||
ctg_parallel.submit = submit
|
||||
hyper.submit = submit
|
||||
_COTENGRA_DASK_SUBMIT_PATCHED = True
|
||||
|
||||
|
||||
def _patch_cotengra_dask_as_completed():
|
||||
"""Make cotengra 0.7.5 handle distributed.Future objects.
|
||||
|
||||
This cotengra release routes all parallel futures through
|
||||
``concurrent.futures.as_completed()``, which does not accept dask
|
||||
``distributed.Future`` instances. Keep cotengra's optimizer/reporting logic
|
||||
intact and only swap the wait primitive when the futures are from dask.
|
||||
"""
|
||||
global _COTENGRA_DASK_PATCHED
|
||||
if _COTENGRA_DASK_PATCHED:
|
||||
return
|
||||
|
||||
from cotengra.hyperoptimizers.hyper import HyperOptimizer
|
||||
|
||||
def _get_and_report_next_future(self):
|
||||
futures_map = {future: setting for setting, future in self._futures}
|
||||
if not futures_map:
|
||||
return {
|
||||
"score": float("inf"),
|
||||
"flops": float("inf"),
|
||||
"write": float("inf"),
|
||||
"size": float("inf"),
|
||||
"time": 0.0,
|
||||
}
|
||||
|
||||
future0 = next(iter(futures_map))
|
||||
if future0.__class__.__module__.split(".", 1)[0] == "distributed":
|
||||
from distributed import as_completed
|
||||
|
||||
deadline = getattr(self, "_qibotn_deadline", None)
|
||||
timeout = None if deadline is None else max(0.0, deadline - time.time())
|
||||
try:
|
||||
future = next(iter(as_completed(futures_map, timeout=timeout)))
|
||||
except TimeoutError:
|
||||
for future in futures_map:
|
||||
future.cancel()
|
||||
self._futures = []
|
||||
return {
|
||||
"score": float("inf"),
|
||||
"flops": float("inf"),
|
||||
"write": float("inf"),
|
||||
"size": float("inf"),
|
||||
"time": 0.0,
|
||||
}
|
||||
else:
|
||||
import concurrent.futures as _cf
|
||||
|
||||
future = next(_cf.as_completed(futures_map))
|
||||
|
||||
setting = futures_map[future]
|
||||
self._futures = [(s, f) for s, f in self._futures if f is not future]
|
||||
try:
|
||||
trial = future.result()
|
||||
except Exception:
|
||||
trial = {
|
||||
"score": float("inf"),
|
||||
"flops": float("inf"),
|
||||
"write": float("inf"),
|
||||
"size": float("inf"),
|
||||
"time": 0.0,
|
||||
}
|
||||
self._maybe_report_result(setting, trial)
|
||||
return trial
|
||||
|
||||
HyperOptimizer._get_and_report_next_future = _get_and_report_next_future
|
||||
_COTENGRA_DASK_PATCHED = True
|
||||
|
||||
|
||||
def _search_chunk(
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
repeats,
|
||||
seed,
|
||||
max_time,
|
||||
slicing_opts,
|
||||
optlib=None,
|
||||
):
|
||||
import random, cotengra as ctg
|
||||
|
||||
random.seed(seed)
|
||||
tn = pickle.loads(tn_bytes)
|
||||
kwargs = {}
|
||||
if optlib is not None:
|
||||
kwargs["optlib"] = optlib
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=SEARCH_METHODS,
|
||||
max_repeats=repeats,
|
||||
max_time=max_time,
|
||||
parallel=False,
|
||||
minimize="combo-256",
|
||||
slicing_opts=slicing_opts,
|
||||
progbar=False,
|
||||
**kwargs,
|
||||
)
|
||||
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
|
||||
return tree.combo_cost(factor=256), _attach_search_stats(tree, opt)
|
||||
|
||||
|
||||
def _run_single_trial(tn_bytes, output_inds, seed, slicing_opts):
|
||||
return _search_chunk(
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
repeats=1,
|
||||
seed=seed,
|
||||
max_time=None,
|
||||
slicing_opts=slicing_opts,
|
||||
optlib="random",
|
||||
)
|
||||
|
||||
|
||||
def _kill_pool(pool):
|
||||
processes = getattr(pool, "_processes", None)
|
||||
if processes:
|
||||
pids = list(processes.keys())
|
||||
else:
|
||||
pids = []
|
||||
|
||||
for pid in pids:
|
||||
try:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
pool.shutdown(wait=False)
|
||||
|
||||
|
||||
def _serial_search(tn_bytes, output_inds, repeats, seed, max_time, slicing_opts=None, trial_timeout=None):
|
||||
import time
|
||||
|
||||
if trial_timeout is None:
|
||||
return _search_chunk(
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
repeats=repeats,
|
||||
seed=seed,
|
||||
max_time=max_time,
|
||||
slicing_opts=slicing_opts,
|
||||
)
|
||||
|
||||
deadline = time.time() + max_time
|
||||
best_cost, best_tree = float("inf"), None
|
||||
|
||||
for i in range(repeats):
|
||||
if time.time() >= deadline:
|
||||
break
|
||||
timeout = min(trial_timeout, deadline - time.time())
|
||||
pool = ProcessPoolExecutor(max_workers=1)
|
||||
fut = pool.submit(_run_single_trial, tn_bytes, output_inds, seed * 10000 + i, slicing_opts)
|
||||
try:
|
||||
cost, tree = fut.result(timeout=timeout)
|
||||
if cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
_kill_pool(pool)
|
||||
|
||||
return best_cost, best_tree
|
||||
|
||||
|
||||
def _split_repeats(total_repeats, n_workers):
|
||||
n_workers = max(1, int(n_workers))
|
||||
total_repeats = max(1, int(total_repeats))
|
||||
chunk, extra = divmod(total_repeats, n_workers)
|
||||
return [chunk + (1 if i < extra else 0) for i in range(n_workers) if chunk + (1 if i < extra else 0) > 0]
|
||||
|
||||
|
||||
def _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts=None, trial_timeout=None):
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
repeat_chunks = _split_repeats(total_repeats, n_workers)
|
||||
pool = ProcessPoolExecutor(max_workers=len(repeat_chunks))
|
||||
futures = []
|
||||
for seed, repeats in enumerate(repeat_chunks):
|
||||
futures.append(
|
||||
pool.submit(
|
||||
_serial_search,
|
||||
tn_bytes,
|
||||
output_inds,
|
||||
repeats,
|
||||
seed,
|
||||
max_time,
|
||||
slicing_opts,
|
||||
trial_timeout,
|
||||
)
|
||||
)
|
||||
best_cost, best_tree = float("inf"), None
|
||||
deadline = time.monotonic() + max_time if max_time is not None else None
|
||||
try:
|
||||
timeout = None if deadline is None else max(0.0, deadline - time.monotonic())
|
||||
for fut in as_completed(futures, timeout=timeout):
|
||||
try:
|
||||
cost, tree = fut.result()
|
||||
if cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
except Exception:
|
||||
pass
|
||||
except TimeoutError:
|
||||
pass
|
||||
finally:
|
||||
for fut in futures:
|
||||
fut.cancel()
|
||||
_kill_pool(pool)
|
||||
return best_tree
|
||||
|
||||
|
||||
def _dask_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
slicing_opts=None,
|
||||
dask_address=None,
|
||||
n_workers=None,
|
||||
optlib=None,
|
||||
debug_trials=False,
|
||||
close_workers=False,
|
||||
):
|
||||
"""Run one centralized cotengra hyper-optimizer over a dask pool.
|
||||
|
||||
With ``dask_address`` this connects to an external distributed scheduler.
|
||||
Without it, a local dask cluster is created for single-node smoke testing.
|
||||
"""
|
||||
try:
|
||||
from distributed import Client, LocalCluster, get_client
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"Dask search requires `distributed`. Install it with "
|
||||
"`pip install distributed` or the package extra that provides it."
|
||||
) from exc
|
||||
|
||||
import cotengra as ctg
|
||||
|
||||
_patch_cotengra_dask_as_completed()
|
||||
_patch_cotengra_dask_submit(debug_trials=debug_trials)
|
||||
|
||||
close_client = False
|
||||
close_cluster = False
|
||||
cluster = None
|
||||
|
||||
if dask_address:
|
||||
client = Client(dask_address)
|
||||
close_client = True
|
||||
else:
|
||||
try:
|
||||
client = get_client()
|
||||
except ValueError:
|
||||
cluster = LocalCluster(
|
||||
n_workers=max(1, int(n_workers or os.cpu_count() or 1)),
|
||||
threads_per_worker=1,
|
||||
processes=True,
|
||||
memory_limit=0,
|
||||
)
|
||||
client = Client(cluster)
|
||||
close_client = True
|
||||
close_cluster = True
|
||||
|
||||
kwargs = {}
|
||||
if optlib is not None:
|
||||
kwargs["optlib"] = optlib
|
||||
|
||||
retire_workers = []
|
||||
try:
|
||||
workers, worker_slots = _dask_worker_slots(client)
|
||||
if close_workers:
|
||||
retire_workers = list(workers)
|
||||
if debug_trials:
|
||||
_print_dask_worker_summary(client)
|
||||
if total_repeats < worker_slots:
|
||||
print(
|
||||
"qibotn_dask_underutilized "
|
||||
f"requested_trials={total_repeats} worker_slots={worker_slots} "
|
||||
"hint='increase --tn-search-repeats to at least worker_slots'",
|
||||
flush=True,
|
||||
)
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=SEARCH_METHODS,
|
||||
max_repeats=total_repeats,
|
||||
max_time=max_time,
|
||||
parallel=client,
|
||||
minimize="combo-256",
|
||||
slicing_opts=slicing_opts,
|
||||
progbar=False,
|
||||
**kwargs,
|
||||
)
|
||||
opt._num_workers = max(1, worker_slots)
|
||||
opt.pre_dispatch = max(1, min(int(total_repeats), worker_slots))
|
||||
if max_time is not None:
|
||||
opt._qibotn_deadline = time.time() + max_time
|
||||
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
|
||||
return _attach_search_stats(tree, opt)
|
||||
finally:
|
||||
if close_workers and retire_workers:
|
||||
try:
|
||||
retired = client.retire_workers(
|
||||
workers=retire_workers,
|
||||
close_workers=True,
|
||||
remove=True,
|
||||
)
|
||||
print(
|
||||
"qibotn_dask_workers_closed "
|
||||
f"requested={len(retire_workers)} retired={len(retired)}",
|
||||
flush=True,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(
|
||||
"qibotn_dask_workers_close_failed "
|
||||
f"requested={len(retire_workers)} error={exc!r}",
|
||||
flush=True,
|
||||
)
|
||||
if close_client:
|
||||
client.close()
|
||||
if close_cluster:
|
||||
cluster.close()
|
||||
|
||||
|
||||
def _mpi_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
n_workers=None,
|
||||
slicing_opts=None,
|
||||
trial_timeout=None,
|
||||
search_backend="processpool",
|
||||
dask_address=None,
|
||||
debug_trials=False,
|
||||
dask_close_workers=False,
|
||||
):
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
search_backend = search_backend or "processpool"
|
||||
|
||||
if search_backend == "dask":
|
||||
if not dask_address:
|
||||
raise ValueError(
|
||||
"MPI + dask search requires an external dask scheduler. Start "
|
||||
"dask-scheduler/dask-worker outside mpiexec and pass "
|
||||
"`--dask-address tcp://host:8786`."
|
||||
)
|
||||
|
||||
payload = None
|
||||
if rank == 0:
|
||||
try:
|
||||
tree = _dask_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
slicing_opts=slicing_opts,
|
||||
dask_address=dask_address,
|
||||
n_workers=n_workers,
|
||||
debug_trials=debug_trials,
|
||||
close_workers=dask_close_workers,
|
||||
)
|
||||
payload = ("ok", tree)
|
||||
except Exception as exc:
|
||||
payload = ("error", repr(exc))
|
||||
|
||||
status, value = comm.bcast(payload, root=0)
|
||||
if status == "error":
|
||||
raise RuntimeError(f"Dask path search failed on rank 0: {value}")
|
||||
return value
|
||||
|
||||
repeats_per = max(1, total_repeats // size)
|
||||
|
||||
# Run search work in child processes even when n_workers == 1, so the parent
|
||||
# MPI rank can enforce the global timeout by killing active trials.
|
||||
local_tree = _processpool_search(
|
||||
tn,
|
||||
output_inds,
|
||||
repeats_per,
|
||||
max(1, n_workers or 1),
|
||||
max_time,
|
||||
slicing_opts,
|
||||
trial_timeout,
|
||||
)
|
||||
local_cost = local_tree.combo_cost(factor=256) if local_tree else float("inf")
|
||||
|
||||
all_results = comm.gather((local_cost, local_tree), root=0)
|
||||
best_tree = None
|
||||
if rank == 0:
|
||||
best_cost = float("inf")
|
||||
for cost, tree in all_results:
|
||||
if tree is not None and cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
return comm.bcast(best_tree, root=0)
|
||||
|
||||
|
||||
def parallel_path_search(tn, output_inds, method='processpool', total_repeats=1024,
|
||||
max_time=300, n_workers=48, slicing_opts=None,
|
||||
trial_timeout=None, search_backend=None,
|
||||
dask_address=None, debug_trials=False,
|
||||
dask_close_workers=False):
|
||||
"""Parallel contraction path search.
|
||||
|
||||
Args:
|
||||
method: 'processpool' | 'dask' | 'mpi' | 'serial'
|
||||
total_repeats: Total optimization repeats across all workers
|
||||
max_time: Global timeout per worker (seconds)
|
||||
n_workers: Workers per MPI rank (or total for processpool)
|
||||
slicing_opts: cotengra slicing options for memory control
|
||||
trial_timeout: Per-trial timeout (seconds); kills and skips hung trials
|
||||
"""
|
||||
if method == 'serial':
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
_, tree = _serial_search(tn_bytes, output_inds, total_repeats, 0, max_time, slicing_opts, trial_timeout)
|
||||
return tree
|
||||
elif method == 'mpi':
|
||||
if not _HAVE_MPI:
|
||||
raise ImportError("mpi4py not available")
|
||||
return _mpi_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
n_workers,
|
||||
slicing_opts,
|
||||
trial_timeout,
|
||||
search_backend=search_backend,
|
||||
dask_address=dask_address,
|
||||
debug_trials=debug_trials,
|
||||
dask_close_workers=dask_close_workers,
|
||||
)
|
||||
elif method == 'processpool':
|
||||
return _processpool_search(tn, output_inds, total_repeats, n_workers, max_time, slicing_opts, trial_timeout)
|
||||
elif method == 'dask':
|
||||
return _dask_search(
|
||||
tn,
|
||||
output_inds,
|
||||
total_repeats,
|
||||
max_time,
|
||||
slicing_opts=slicing_opts,
|
||||
dask_address=dask_address,
|
||||
n_workers=n_workers,
|
||||
debug_trials=debug_trials,
|
||||
close_workers=dask_close_workers,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown method: {method}")
|
||||
|
||||
|
||||
def contraction_tree_costs(tree, dtype_bytes=16, combo_factor=256):
|
||||
"""Return comparable cost estimates for a cotengra contraction tree.
|
||||
|
||||
These values are estimates, not profiling results. They are the right first
|
||||
signal for path quality: lower ``combo`` usually means lower CPU contraction
|
||||
time, while ``peak_memory_gib`` estimates the largest intermediate tensor.
|
||||
"""
|
||||
stats = tree.contract_stats()
|
||||
flops = float(stats["flops"])
|
||||
write = float(stats["write"])
|
||||
size = float(stats["size"])
|
||||
combo = float(tree.combo_cost(factor=combo_factor))
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
original_flops = float(stats.get("original_flops", flops))
|
||||
|
||||
return {
|
||||
"flops": flops,
|
||||
"write": write,
|
||||
"size": size,
|
||||
"combo": combo,
|
||||
"log10_flops": log10(flops) if flops > 0 else float("-inf"),
|
||||
"log10_write": log10(write) if write > 0 else float("-inf"),
|
||||
"log2_size": log2(size) if size > 0 else float("-inf"),
|
||||
"log10_combo": log10(combo) if combo > 0 else float("-inf"),
|
||||
"nslices": nslices,
|
||||
"slicing_overhead": flops / original_flops if original_flops > 0 else float("nan"),
|
||||
"peak_memory_gib": size * dtype_bytes / 1024**3,
|
||||
}
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SlicePlan:
|
||||
"""Slice ownership for one MPI rank."""
|
||||
|
||||
rank: int
|
||||
size: int
|
||||
nslices: int
|
||||
indices: tuple
|
||||
assignment: str = "block"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SlicedContractStats:
|
||||
"""Diagnostics for one sliced contraction."""
|
||||
|
||||
rank: int
|
||||
size: int
|
||||
nslices: int
|
||||
local_slices: int
|
||||
assignment: str
|
||||
|
||||
|
||||
def mpi_slice_plan(nslices, rank, size, assignment="block"):
|
||||
"""Return the contraction slice ids assigned to one MPI rank.
|
||||
|
||||
``block`` gives each rank a contiguous range, mirroring cutensornet's
|
||||
slice-range style. ``cyclic`` gives rank ``r`` slices ``r, r + size, ...``,
|
||||
which can balance better if individual slice costs vary.
|
||||
"""
|
||||
if nslices < 0:
|
||||
raise ValueError("nslices must be non-negative.")
|
||||
if size <= 0:
|
||||
raise ValueError("size must be positive.")
|
||||
if not 0 <= rank < size:
|
||||
raise ValueError("rank must satisfy 0 <= rank < size.")
|
||||
|
||||
if assignment == "block":
|
||||
chunk, extra = divmod(nslices, size)
|
||||
start = rank * chunk + min(rank, extra)
|
||||
stop = start + chunk + (1 if rank < extra else 0)
|
||||
indices = tuple(range(start, stop))
|
||||
elif assignment == "cyclic":
|
||||
indices = tuple(range(rank, nslices, size))
|
||||
else:
|
||||
raise ValueError("assignment must be 'block' or 'cyclic'.")
|
||||
|
||||
return SlicePlan(rank, size, nslices, indices, assignment)
|
||||
|
||||
|
||||
def _array_backend(arrays):
|
||||
return "torch" if type(arrays[0]).__module__.startswith("torch") else "numpy"
|
||||
|
||||
|
||||
def _to_numpy_vector(value, is_torch):
|
||||
if is_torch:
|
||||
return value.detach().cpu().numpy().reshape(-1)
|
||||
return np.asarray(value).reshape(-1)
|
||||
|
||||
|
||||
def _zero_vector_like(arrays):
|
||||
array = arrays[0]
|
||||
if type(array).__module__.startswith("torch"):
|
||||
return np.zeros(1, dtype=np.complex64 if "64" in str(array.dtype) else np.complex128)
|
||||
return np.zeros(1, dtype=np.asarray(array).dtype)
|
||||
|
||||
|
||||
def contract_tree_slices(tree, arrays, slice_indices, backend=None, implementation=None):
|
||||
"""Contract a subset of cotengra slices and return their local sum."""
|
||||
backend = backend or _array_backend(arrays)
|
||||
is_torch = backend == "torch"
|
||||
local = None
|
||||
cpp_contract = None
|
||||
if implementation == "cpp":
|
||||
if backend != "torch":
|
||||
raise ValueError("implementation='cpp' requires torch arrays.")
|
||||
from qibotn.torch_contractor import contract_tree_cpp
|
||||
|
||||
cpp_contract = contract_tree_cpp
|
||||
|
||||
for slice_id in slice_indices:
|
||||
if cpp_contract is not None:
|
||||
value = cpp_contract(tree, tree.slice_arrays(arrays, slice_id))
|
||||
elif implementation is None:
|
||||
value = tree.contract_slice(arrays, slice_id, backend=backend)
|
||||
else:
|
||||
value = tree.contract_slice(
|
||||
arrays,
|
||||
slice_id,
|
||||
backend=backend,
|
||||
implementation=implementation,
|
||||
)
|
||||
value = _to_numpy_vector(value, is_torch)
|
||||
local = value if local is None else local + value
|
||||
|
||||
return _zero_vector_like(arrays) if local is None else local
|
||||
|
||||
|
||||
def parallel_contract(
|
||||
tree,
|
||||
arrays,
|
||||
method='mpi',
|
||||
comm=None,
|
||||
assignment="block",
|
||||
return_stats=False,
|
||||
implementation=None,
|
||||
):
|
||||
if method == 'mpi':
|
||||
if not _HAVE_MPI or comm is None:
|
||||
raise ValueError("MPI method requires mpi4py and comm")
|
||||
return _contract_mpi(
|
||||
tree,
|
||||
arrays,
|
||||
comm,
|
||||
assignment=assignment,
|
||||
return_stats=return_stats,
|
||||
implementation=implementation,
|
||||
)
|
||||
raise ValueError(f"Unknown method: {method}")
|
||||
|
||||
|
||||
def _contract_mpi(
|
||||
tree,
|
||||
arrays,
|
||||
comm,
|
||||
root=0,
|
||||
assignment="block",
|
||||
return_stats=False,
|
||||
implementation=None,
|
||||
):
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
backend = _array_backend(arrays)
|
||||
is_torch = backend == "torch"
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
stats = SlicedContractStats(rank, size, nslices, 0, assignment)
|
||||
nslices_by_rank = comm.allgather(nslices)
|
||||
if len(set(nslices_by_rank)) != 1:
|
||||
raise RuntimeError(
|
||||
"Inconsistent contraction tree slices across MPI ranks: "
|
||||
f"{nslices_by_rank}. Ensure all nodes load the same tree file."
|
||||
)
|
||||
|
||||
if not set(getattr(tree, "sliced_inds", ())).isdisjoint(set(getattr(tree, "output", ()))):
|
||||
raise NotImplementedError(
|
||||
"MPI sliced contraction currently requires sliced indices not to "
|
||||
"appear in the output."
|
||||
)
|
||||
|
||||
plan = mpi_slice_plan(nslices, rank, size, assignment=assignment)
|
||||
local = contract_tree_slices(
|
||||
tree,
|
||||
arrays,
|
||||
plan.indices,
|
||||
backend=backend,
|
||||
implementation=implementation,
|
||||
)
|
||||
stats = SlicedContractStats(rank, size, nslices, len(plan.indices), assignment)
|
||||
|
||||
result = np.zeros_like(local) if rank == root else None
|
||||
comm.Reduce(local, result, root=root)
|
||||
return (result, stats) if return_stats else result
|
||||
@@ -57,10 +57,10 @@ class TensorNetworkResult:
|
||||
return self.measures
|
||||
|
||||
def state(self):
|
||||
"""Return the statevector if the number of qubits is less than 35."""
|
||||
if self.nqubits < 35:
|
||||
"""Return the statevector if the number of qubits is less than 20."""
|
||||
if self.nqubits < 20:
|
||||
return self.statevector
|
||||
raise_error(
|
||||
NotImplementedError,
|
||||
f"Tensor network simulation cannot be used to reconstruct statevector for >= 35 .",
|
||||
f"Tensor network simulation cannot be used to reconstruct statevector for >= 20 .",
|
||||
)
|
||||
|
||||
@@ -1,252 +0,0 @@
|
||||
"""Torch C++ contraction backend for cotengra trees.
|
||||
|
||||
This module compiles a restricted cotengra contraction tree into a compact
|
||||
execution plan, then executes that plan in a C++ torch extension. It is an
|
||||
experimental CPU path for reducing Python-level overhead between many
|
||||
pairwise contractions.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import os
|
||||
from functools import lru_cache
|
||||
from pathlib import Path
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
_EXTENSION = None
|
||||
_CONTRACTORS = {}
|
||||
SMALL_GEMM_BATCH_FLOPS = 1_000_000
|
||||
|
||||
|
||||
def _load_extension():
|
||||
global _EXTENSION
|
||||
if _EXTENSION is not None:
|
||||
return _EXTENSION
|
||||
|
||||
from torch.utils.cpp_extension import load
|
||||
|
||||
source = Path(__file__).resolve().parent / "csrc" / "torch_contractor.cpp"
|
||||
mklroot = os.environ.get("MKLROOT")
|
||||
extra_cflags = ["-O3"]
|
||||
extra_ldflags = []
|
||||
extra_include_paths = []
|
||||
if mklroot:
|
||||
mklroot_path = Path(mklroot)
|
||||
mkl_include = mklroot_path / "include"
|
||||
mkl_lib = mklroot_path / "lib"
|
||||
if (mkl_include / "mkl_cblas.h").exists() and (
|
||||
(mkl_lib / "libmkl_rt.so").exists()
|
||||
or (mkl_lib / "libmkl_rt.so.2").exists()
|
||||
):
|
||||
extra_cflags.append("-DQIBOTN_USE_MKL")
|
||||
extra_include_paths.append(str(mkl_include))
|
||||
extra_ldflags.extend([f"-L{mkl_lib}", "-lmkl_rt"])
|
||||
|
||||
_EXTENSION = load(
|
||||
name="qibotn_torch_contractor",
|
||||
sources=[str(source)],
|
||||
extra_cflags=extra_cflags,
|
||||
extra_ldflags=extra_ldflags,
|
||||
extra_include_paths=extra_include_paths,
|
||||
verbose=False,
|
||||
)
|
||||
return _EXTENSION
|
||||
|
||||
|
||||
def _is_plain_permutation(expr):
|
||||
if expr is None:
|
||||
return None
|
||||
if isinstance(expr, tuple):
|
||||
return tuple(int(i) for i in expr)
|
||||
if not isinstance(expr, str):
|
||||
return None
|
||||
if "," in expr or "->" not in expr:
|
||||
return None
|
||||
source, target = expr.split("->", 1)
|
||||
if len(source) != len(target):
|
||||
return None
|
||||
if len(set(source)) != len(source) or set(source) != set(target):
|
||||
return None
|
||||
return tuple(source.index(ix) for ix in target)
|
||||
|
||||
|
||||
def _maybe_tuple(values):
|
||||
return () if values is None else tuple(int(x) for x in values)
|
||||
|
||||
|
||||
def _shape_from_inds(tree, node):
|
||||
return tuple(int(tree.size_dict[ix]) for ix in tree.get_inds(node))
|
||||
|
||||
|
||||
def _matmul_signature(op):
|
||||
kind = op[3]
|
||||
if kind != 0:
|
||||
return None
|
||||
left_shape = op[5]
|
||||
right_shape = op[7]
|
||||
if len(left_shape) == 2 and len(right_shape) == 2:
|
||||
m, k, n = left_shape[-2], left_shape[-1], right_shape[-1]
|
||||
return ("mm", int(m), int(k), int(n), int(m * k * n))
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_node_ids(tree, contractions):
|
||||
leaf_to_id = {
|
||||
frozenset((i,)): i
|
||||
for i in range(tree.N)
|
||||
}
|
||||
next_id = len(leaf_to_id)
|
||||
node_to_id = dict(leaf_to_id)
|
||||
for parent, _left, _right, _tdot, _arg, _perm in contractions:
|
||||
if parent not in node_to_id:
|
||||
node_to_id[parent] = next_id
|
||||
next_id += 1
|
||||
|
||||
return node_to_id, next_id
|
||||
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def compile_torch_plan(tree):
|
||||
"""Compile ``tree`` into C++ contractor plan fields.
|
||||
|
||||
The supported subset is the same pairwise matmul lowering used by
|
||||
cotengra for torch CPU. Single-tensor diagonal/sum preprocessing is not
|
||||
supported yet because it appears only in less common trees; callers should
|
||||
fall back to cotengra for those cases.
|
||||
"""
|
||||
|
||||
contract_mod = importlib.import_module("cotengra.contract")
|
||||
contractions = contract_mod.extract_contractions(tree)
|
||||
node_to_id, ntemps = _normalize_node_ids(tree, contractions)
|
||||
plan = []
|
||||
|
||||
for parent, left, right, tdot, arg, perm in contractions:
|
||||
if left is None or right is None:
|
||||
raise NotImplementedError(
|
||||
"C++ torch contractor does not support cotengra preprocessing."
|
||||
)
|
||||
|
||||
left_shape = _shape_from_inds(tree, left)
|
||||
right_shape = _shape_from_inds(tree, right)
|
||||
if tdot:
|
||||
parsed = contract_mod._parse_tensordot_axes_to_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
else:
|
||||
parsed = contract_mod._parse_eq_to_batch_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
|
||||
(
|
||||
eq_a,
|
||||
eq_b,
|
||||
new_shape_a,
|
||||
new_shape_b,
|
||||
new_shape_ab,
|
||||
perm_ab,
|
||||
pure_multiplication,
|
||||
) = parsed
|
||||
|
||||
left_perm = _is_plain_permutation(eq_a)
|
||||
right_perm = _is_plain_permutation(eq_b)
|
||||
if left_perm is None and eq_a is not None:
|
||||
raise NotImplementedError(f"Unsupported left preparation: {eq_a!r}")
|
||||
if right_perm is None and eq_b is not None:
|
||||
raise NotImplementedError(f"Unsupported right preparation: {eq_b!r}")
|
||||
|
||||
plan.append(
|
||||
(
|
||||
node_to_id[parent],
|
||||
node_to_id[left],
|
||||
node_to_id[right],
|
||||
1 if pure_multiplication else 0,
|
||||
left_perm or (),
|
||||
_maybe_tuple(new_shape_a),
|
||||
right_perm or (),
|
||||
_maybe_tuple(new_shape_b),
|
||||
_maybe_tuple(new_shape_ab),
|
||||
_maybe_tuple(perm_ab),
|
||||
)
|
||||
)
|
||||
|
||||
if perm is not None:
|
||||
raise NotImplementedError(
|
||||
"C++ torch contractor does not support cotengra tensordot perm."
|
||||
)
|
||||
|
||||
root_id = node_to_id[tree.root]
|
||||
return tuple(plan), int(ntemps), int(root_id)
|
||||
|
||||
|
||||
@lru_cache(maxsize=32)
|
||||
def compile_batch_groups(tree, max_flops=SMALL_GEMM_BATCH_FLOPS):
|
||||
plan, _ntemps, _root_id = compile_torch_plan(tree)
|
||||
contractions = importlib.import_module("cotengra.contract").extract_contractions(tree)
|
||||
node_to_id, _ntemps = _normalize_node_ids(tree, contractions)
|
||||
depth = {frozenset((i,)): 0 for i in range(tree.N)}
|
||||
tensor_depth = {i: 0 for i in range(tree.N)}
|
||||
groups = defaultdict(list)
|
||||
|
||||
for op_index, (contract_op, contraction) in enumerate(zip(plan, contractions)):
|
||||
parent, left, right, _tdot, _arg, _perm = contraction
|
||||
d = max(depth[left], depth[right]) + 1
|
||||
depth[parent] = d
|
||||
tensor_depth[contract_op[0]] = d
|
||||
sig = _matmul_signature(contract_op)
|
||||
if sig is None:
|
||||
continue
|
||||
kind, m, k, n, flops = sig
|
||||
if flops > max_flops:
|
||||
continue
|
||||
groups[(d, kind, m, k, n)].append(op_index)
|
||||
|
||||
batch_groups = tuple(
|
||||
tuple(items)
|
||||
for _key, items in sorted(groups.items(), key=lambda item: (item[0], item[1][0]))
|
||||
if len(items) >= 2
|
||||
)
|
||||
return batch_groups
|
||||
|
||||
|
||||
def batch_group_summary(tree, max_flops=SMALL_GEMM_BATCH_FLOPS):
|
||||
plan, _ntemps, _root_id = compile_torch_plan(tree)
|
||||
groups = compile_batch_groups(tree, max_flops=max_flops)
|
||||
covered = sum(len(group) for group in groups)
|
||||
calls_saved = sum(len(group) - 1 for group in groups)
|
||||
by_shape = []
|
||||
for group in groups:
|
||||
op = plan[group[0]]
|
||||
sig = _matmul_signature(op)
|
||||
by_shape.append((sig[1:4], len(group), group[:8]))
|
||||
return {
|
||||
"groups": len(groups),
|
||||
"covered_ops": covered,
|
||||
"calls_saved": calls_saved,
|
||||
"by_shape": by_shape,
|
||||
}
|
||||
|
||||
|
||||
def contract_tree_cpp(tree, arrays):
|
||||
"""Contract a cotengra tree using the experimental C++ torch contractor."""
|
||||
|
||||
contractor = prepare_torch_cpp_contractor(tree)
|
||||
return contractor.contract(list(arrays))
|
||||
|
||||
|
||||
def prepare_torch_cpp_contractor(tree):
|
||||
"""Load the extension and compile ``tree`` without running contraction."""
|
||||
|
||||
ext = _load_extension()
|
||||
key = id(tree)
|
||||
contractor = _CONTRACTORS.get(key)
|
||||
if contractor is None:
|
||||
plan, ntemps, root_id = compile_torch_plan(tree)
|
||||
contractor = ext.Contractor(list(plan), ntemps, root_id)
|
||||
_CONTRACTORS[key] = contractor
|
||||
return contractor
|
||||
27
tests/contract.py
Normal file
27
tests/contract.py
Normal file
@@ -0,0 +1,27 @@
|
||||
import time
|
||||
import pickle
|
||||
|
||||
|
||||
def run(input="tree.pkl"):
|
||||
with open(input, "rb") as f:
|
||||
data = pickle.load(f)
|
||||
|
||||
sliced_tree = data["sliced_tree"]
|
||||
arrays = data["arrays"]
|
||||
n_slices = sliced_tree.nslices
|
||||
print(f"Total slices: {n_slices}")
|
||||
|
||||
t0 = time.perf_counter()
|
||||
total = sum(sliced_tree.contract_slice(arrays, i, backend='numpy',implementation='cotengra') for i in range(n_slices))
|
||||
t1 = time.perf_counter()
|
||||
|
||||
print(f"Contract: {t1 - t0:.4f} s")
|
||||
#print(f"Result: {total:.10f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--input", type=str, default="tree.pkl.bak")
|
||||
args = parser.parse_args()
|
||||
run(args.input)
|
||||
60
tests/gen_qasm.py
Normal file
60
tests/gen_qasm.py
Normal file
@@ -0,0 +1,60 @@
|
||||
"""生成比赛常用测试电路的 QASM 文件。"""
|
||||
import argparse
|
||||
import qibo
|
||||
from qibo.models import QFT, Circuit
|
||||
from qibo import gates
|
||||
import numpy as np
|
||||
|
||||
qibo.set_backend("numpy")
|
||||
|
||||
|
||||
def gen_qft(n_qubits):
|
||||
return QFT(n_qubits, with_swaps=True).to_qasm()
|
||||
|
||||
|
||||
def gen_random(n_qubits, depth, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
c = Circuit(n_qubits)
|
||||
for _ in range(depth):
|
||||
for q in range(n_qubits):
|
||||
c.add(gates.H(q))
|
||||
for q in range(0, n_qubits - 1, 2):
|
||||
c.add(gates.CZ(q, q + 1))
|
||||
return c.to_qasm()
|
||||
|
||||
|
||||
def gen_supremacy(n_qubits, depth, seed):
|
||||
"""Google supremacy 风格:随机单比特门 + CZ"""
|
||||
rng = np.random.default_rng(seed)
|
||||
single = [gates.X, gates.Y, gates.H]
|
||||
c = Circuit(n_qubits)
|
||||
for _ in range(depth):
|
||||
for q in range(n_qubits):
|
||||
g = single[rng.integers(3)]
|
||||
c.add(g(q))
|
||||
for q in range(0, n_qubits - 1, 2):
|
||||
c.add(gates.CZ(q, q + 1))
|
||||
for q in range(1, n_qubits - 1, 2):
|
||||
c.add(gates.CZ(q, q + 1))
|
||||
return c.to_qasm()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--circuit", default="qft", choices=["qft", "random", "supremacy"])
|
||||
parser.add_argument("--n_qubits", type=int, default=20)
|
||||
parser.add_argument("--depth", type=int, default=10)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--out", default="circuit.qasm")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.circuit == "qft":
|
||||
qasm = gen_qft(args.n_qubits)
|
||||
elif args.circuit == "random":
|
||||
qasm = gen_random(args.n_qubits, args.depth, args.seed)
|
||||
else:
|
||||
qasm = gen_supremacy(args.n_qubits, args.depth, args.seed)
|
||||
|
||||
with open(args.out, "w") as f:
|
||||
f.write(qasm)
|
||||
print(f"Written: {args.out} ({args.n_qubits} qubits, {args.circuit})")
|
||||
2
tests/hostfile
Normal file
2
tests/hostfile
Normal file
@@ -0,0 +1,2 @@
|
||||
192.168.20.102
|
||||
192.168.20.101
|
||||
126
tests/mpi_v.py
Normal file
126
tests/mpi_v.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""
|
||||
MPI + ThreadPoolExecutor 混合并行张量网络收缩。
|
||||
每个 MPI rank 负责一部分 slice(stride 分配),
|
||||
rank 内用 ThreadPoolExecutor 并行执行各 slice(每线程一个 slice)。
|
||||
|
||||
用法:
|
||||
mpirun -n <N> python mpi_v.py --qasm circuit.qasm --target-slices 16 --threads 8
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
import quimb.tensor as qtn
|
||||
import cotengra as ctg
|
||||
|
||||
|
||||
def _contract_slice(sliced_tree, arrays, idx):
|
||||
return sliced_tree.contract_slice(arrays, idx, backend="numpy")
|
||||
|
||||
|
||||
def run(qasm_path, target_slices, n_threads, max_repeats):
|
||||
# ── 构建张量网络(rank 0,broadcast arrays)──
|
||||
if rank == 0:
|
||||
with open(qasm_path) as f:
|
||||
qasm_str = f.read()
|
||||
# 不用 full_simplify,保持 outer_inds 完整
|
||||
psi = qtn.Circuit.from_openqasm2_str(qasm_str).psi
|
||||
n_qubits = len([i for i in psi.outer_inds() if i.startswith("k")])
|
||||
output_inds = [f"k{i}" for i in range(n_qubits)]
|
||||
arrays = [t.data for t in psi.tensors]
|
||||
else:
|
||||
psi = None
|
||||
n_qubits = None
|
||||
arrays = None
|
||||
output_inds = None
|
||||
|
||||
n_qubits = comm.bcast(n_qubits, root=0)
|
||||
arrays = comm.bcast(arrays, root=0)
|
||||
output_inds = comm.bcast(output_inds, root=0)
|
||||
|
||||
# ── 路径搜索(rank 0)+ broadcast ──
|
||||
t0 = time.perf_counter()
|
||||
if rank == 0:
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=["kahypar", "greedy"],
|
||||
max_repeats=max_repeats,
|
||||
minimize="flops",
|
||||
parallel=min(96, os.cpu_count()),
|
||||
)
|
||||
tree = psi.contraction_tree(optimize=opt, output_inds=output_inds)
|
||||
n = target_slices
|
||||
sliced_tree = None
|
||||
while n >= 1:
|
||||
try:
|
||||
sliced_tree = tree.slice(target_size=n, allow_outer=False)
|
||||
break
|
||||
except RuntimeError:
|
||||
n //= 2
|
||||
if sliced_tree is None:
|
||||
sliced_tree = tree.slice(target_slices=1, allow_outer=True)
|
||||
print(f"[rank 0] path search: {time.perf_counter()-t0:.2f}s slices: {sliced_tree.nslices}", flush=True)
|
||||
else:
|
||||
sliced_tree = None
|
||||
|
||||
sliced_tree = comm.bcast(sliced_tree, root=0)
|
||||
n_slices = sliced_tree.nslices
|
||||
|
||||
# ── 分布式收缩(MPI stride + ThreadPoolExecutor)──
|
||||
my_indices = list(range(rank, n_slices, size))
|
||||
local_result = np.zeros(2**n_qubits, dtype=np.complex128)
|
||||
|
||||
comm.Barrier()
|
||||
t1 = time.perf_counter()
|
||||
|
||||
with ThreadPoolExecutor(max_workers=n_threads) as pool:
|
||||
for batch_start in range(0, len(my_indices), n_threads):
|
||||
batch = my_indices[batch_start:batch_start + n_threads]
|
||||
futures = {pool.submit(_contract_slice, sliced_tree, arrays, i): i for i in batch}
|
||||
for fut in as_completed(futures):
|
||||
local_result += np.array(fut.result()).flatten()
|
||||
|
||||
t2 = time.perf_counter()
|
||||
if rank == 0:
|
||||
print(f"[rank 0] contract: {t2-t1:.2f}s", flush=True)
|
||||
|
||||
# ── MPI reduce ──
|
||||
total = comm.reduce(local_result, op=MPI.SUM, root=0)
|
||||
|
||||
if rank == 0:
|
||||
print(f"result norm: {np.linalg.norm(total):.10f}", flush=True)
|
||||
print(f"total time: {t2-t0:.2f}s", flush=True)
|
||||
return total
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--qasm", required=True, help="QASM 文件路径")
|
||||
parser.add_argument("--target-slices", type=int, default=None,
|
||||
help="目标切片数量(优先于 target-size)")
|
||||
parser.add_argument("--target-size", type=int, default=28,
|
||||
help="切片目标大小指数(2^N),默认 28")
|
||||
parser.add_argument("--threads", type=int, default=max(1, os.cpu_count() // size),
|
||||
help="每个 rank 的线程数,默认 cpu_count/size")
|
||||
parser.add_argument("--max-repeats", type=int, default=256,
|
||||
help="cotengra 路径搜索重复次数")
|
||||
args = parser.parse_args()
|
||||
|
||||
target = args.target_slices if args.target_slices else 2**args.target_size
|
||||
mode = "slices" if args.target_slices else f"size=2^{args.target_size}"
|
||||
|
||||
if rank == 0:
|
||||
print(f"ranks={size} threads/rank={args.threads} target_{mode}", flush=True)
|
||||
|
||||
run(args.qasm, target, args.threads, args.max_repeats)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
68
tests/quimb_mpi.py
Normal file
68
tests/quimb_mpi.py
Normal file
@@ -0,0 +1,68 @@
|
||||
import os
|
||||
import time
|
||||
import numpy as np
|
||||
import quimb.tensor as qtn
|
||||
import cotengra as ctg
|
||||
'''
|
||||
# --- 1. 关键:在导入 numpy/quimb 之前设置环境变量 ---
|
||||
# 告诉底层 BLAS 库 (MKL/OpenBLAS) 使用 96 个线程
|
||||
os.environ["OMP_NUM_THREADS"] = "1"
|
||||
os.environ["MKL_NUM_THREADS"] = "1"
|
||||
os.environ["OPENBLAS_NUM_THREADS"] = "1"
|
||||
# 优化线程亲和性,避免线程在不同 CPU 核心间跳变,提升缓存命中率
|
||||
os.environ["KMP_AFFINITY"] = "granularity=fine,compact,1,0"
|
||||
os.environ["KMP_BLOCKTIME"] = "0"
|
||||
'''
|
||||
# 现在导入库
|
||||
import psutil
|
||||
|
||||
def run_baseline(n_qubits=50, depth=20):
|
||||
print(f"🚀 {n_qubits} Qubits, Depth {depth}")
|
||||
print(f"💻 Detected Logical Cores: {os.cpu_count()}")
|
||||
|
||||
# 1. 构建电路 (必须 complex128 保证精度)
|
||||
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
|
||||
for d in range(depth):
|
||||
for i in range(n_qubits):
|
||||
circ.apply_gate('H', i)
|
||||
for i in range(0, n_qubits - 1, 2):
|
||||
circ.apply_gate('CZ', i, i + 1)
|
||||
|
||||
psi = circ.psi
|
||||
|
||||
# 2. 构建闭合网络 <psi|psi>
|
||||
net = psi.conj() & psi
|
||||
|
||||
# 3. 路径搜索参数 (Kahypar)
|
||||
print("🔍 Searching path with Kahypar...")
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=['kahypar'],
|
||||
max_repeats=128,
|
||||
parallel=96,
|
||||
minimize='flops',
|
||||
on_trial_error='ignore'
|
||||
)
|
||||
|
||||
# 4. 阶段1:路径搜索
|
||||
t0 = time.perf_counter()
|
||||
tree = net.contraction_tree(optimize=opt)
|
||||
t1 = time.perf_counter()
|
||||
print(f"🔍 Path search done: {t1 - t0:.4f} s")
|
||||
|
||||
# 5. 阶段2:张量收缩
|
||||
result = net.contract(optimize=tree, backend='numpy')
|
||||
t2 = time.perf_counter()
|
||||
peak_mem = psutil.Process().memory_info().rss / 1024**3
|
||||
|
||||
print(f"✅ Done!")
|
||||
print(f"⏱️ Contract: {t2 - t1:.4f} s | Total: {t2 - t0:.4f} s")
|
||||
print(f"💾 Peak Memory: {peak_mem:.2f} GB")
|
||||
print(f"🔢 Result: {result:.10f}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--n_qubits", type=int, default=50)
|
||||
parser.add_argument("--depth", type=int, default=20)
|
||||
args = parser.parse_args()
|
||||
run_baseline(n_qubits=args.n_qubits, depth=args.depth)
|
||||
90
tests/quimb_mpi2.py
Normal file
90
tests/quimb_mpi2.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import time
|
||||
import numpy as np
|
||||
import quimb.tensor as qtn
|
||||
import cotengra as ctg
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
def build_qft(n_qubits):
|
||||
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
|
||||
for i in range(n_qubits):
|
||||
circ.apply_gate('H', i)
|
||||
for j in range(i + 1, n_qubits):
|
||||
circ.apply_gate('CPHASE', np.pi / 2 ** (j - i), i, j)
|
||||
return circ
|
||||
|
||||
def run_mpi(n_qubits, depth=None):
|
||||
if rank == 0:
|
||||
print(f"MPI size: {size} ranks")
|
||||
print(f"Circuit: QFT {n_qubits} qubits")
|
||||
|
||||
circ = build_qft(n_qubits)
|
||||
psi = circ.psi
|
||||
|
||||
# 期望值网络:<psi|Z_0|psi>
|
||||
Z = np.array([[1, 0], [0, -1]], dtype=np.complex128)
|
||||
bra = psi.conj().reindex({f'k{i}': f'b{i}' for i in range(n_qubits)})
|
||||
obs = qtn.Tensor(Z, inds=(f'k0', f'b0'))
|
||||
net = psi & obs & bra
|
||||
|
||||
# 2. 所有 rank 并行搜索路径,rank 0 选全局最优
|
||||
t0 = time.perf_counter()
|
||||
repeats_per_rank = max(1, 128 // size)
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=['kahypar'],
|
||||
#methods=['greedy'],
|
||||
#max_repeats=repeats_per_rank,
|
||||
max_repeats=repeats_per_rank,
|
||||
minimize='flops',
|
||||
parallel=max(1, 96 // size),
|
||||
)
|
||||
local_tree = net.contraction_tree(optimize=opt)
|
||||
|
||||
all_trees = comm.gather(local_tree, root=0)
|
||||
|
||||
if rank == 0:
|
||||
tree = min(all_trees, key=lambda t: t.contraction_cost())
|
||||
t1 = time.perf_counter()
|
||||
print(f"[rank 0] Path search: {t1 - t0:.4f} s")
|
||||
else:
|
||||
tree = None
|
||||
|
||||
tree = comm.bcast(tree, root=0)
|
||||
|
||||
# 3. rank 0 切片,broadcast sliced_tree
|
||||
if rank == 0:
|
||||
sliced_tree = tree.slice(target_size=2**27)
|
||||
else:
|
||||
sliced_tree = None
|
||||
sliced_tree = comm.bcast(sliced_tree, root=0)
|
||||
n_slices = sliced_tree.nslices
|
||||
|
||||
if rank == 0:
|
||||
print(f"Total slices: {n_slices}, each rank handles ~{n_slices // size}")
|
||||
|
||||
arrays = [t.data for t in net.tensors]
|
||||
|
||||
# 每个 rank 处理自己负责的切片
|
||||
t2 = time.perf_counter()
|
||||
local_result = 0.0 + 0.0j
|
||||
for i in range(rank, n_slices, size):
|
||||
local_result += sliced_tree.contract_slice(arrays, i, backend='numpy')
|
||||
t3 = time.perf_counter()
|
||||
|
||||
# 4. reduce 汇总到 rank 0
|
||||
total = comm.reduce(local_result, op=MPI.SUM, root=0)
|
||||
|
||||
if rank == 0:
|
||||
print(f"[rank 0] Contract: {t3 - t2:.4f} s")
|
||||
print(f"Result: {total:.10f}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--n_qubits", type=int, default=20)
|
||||
parser.add_argument("--depth", type=int, default=30)
|
||||
args = parser.parse_args()
|
||||
run_mpi(args.n_qubits, args.depth)
|
||||
103
tests/quimb_mpi3.py
Normal file
103
tests/quimb_mpi3.py
Normal file
@@ -0,0 +1,103 @@
|
||||
import time
|
||||
import numpy as np
|
||||
import quimb.tensor as qtn
|
||||
import cotengra as ctg
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
def build_qft_circuit(n_qubits):
|
||||
"""构建标准 QFT 电路"""
|
||||
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
|
||||
for i in range(n_qubits):
|
||||
# 1. 施加 H 门
|
||||
circ.apply_gate('H', i)
|
||||
# 2. 施加受控相位旋转
|
||||
for j in range(i + 1, n_qubits):
|
||||
theta = np.pi / (2**(j - i))
|
||||
circ.apply_gate('CPHASE', theta, i, j)
|
||||
return circ
|
||||
|
||||
def run_mpi(n_qubits):
|
||||
if rank == 0:
|
||||
print(f"MPI size: {size} ranks")
|
||||
print(f"Circuit: QFT {n_qubits} qubits")
|
||||
|
||||
# 1. 所有 rank 独立构建 QFT 电路
|
||||
circ = build_qft_circuit(n_qubits)
|
||||
|
||||
# 物理观测:计算 <psi|psi>,结果应为 1.0
|
||||
# 注意:QFT 是幺正变换,末态模长平方必为 1
|
||||
psi = circ.psi
|
||||
net = psi.conj() & psi
|
||||
|
||||
# 2. 路径搜索优化
|
||||
t0 = time.perf_counter()
|
||||
# 每个 rank 尝试不同的种子,增加找到全局最优路径的概率
|
||||
repeats_per_rank = max(1, 256 // size)
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=['kahypar'],
|
||||
max_repeats=repeats_per_rank,
|
||||
minimize='flops',
|
||||
parallel=max(1, 96 // size),
|
||||
)
|
||||
# 搜索收缩树
|
||||
local_tree = net.contraction_tree(optimize=opt)
|
||||
|
||||
# 汇总所有 rank 找到的树,在 rank 0 选出 FLOPs 最低的那棵
|
||||
all_trees = comm.gather(local_tree, root=0)
|
||||
|
||||
if rank == 0:
|
||||
tree = min(all_trees, key=lambda t: t.contraction_cost())
|
||||
t1 = time.perf_counter()
|
||||
print(f"[rank 0] Path search: {t1 - t0:.4f} s")
|
||||
print(f"[rank 0] Best path FLOPs: {tree.contraction_cost():.2e}")
|
||||
else:
|
||||
tree = None
|
||||
|
||||
# 将最优路径广播给所有进程
|
||||
tree = comm.bcast(tree, root=0)
|
||||
|
||||
# 3. 切片处理(性能控制核心)
|
||||
if rank == 0:
|
||||
# 比赛建议:将 target_size 设为能填满单进程内存的 50%-70%
|
||||
# 或者改用 target_slices=size * 4 以确保负载绝对平衡
|
||||
sliced_tree = tree.slice(target_size=2**27)
|
||||
else:
|
||||
sliced_tree = None
|
||||
|
||||
sliced_tree = comm.bcast(sliced_tree, root=0)
|
||||
n_slices = sliced_tree.nslices
|
||||
|
||||
if rank == 0:
|
||||
print(f"Total slices: {n_slices}, each rank handles ~{n_slices // size + 1}")
|
||||
|
||||
# 获取原始张量数据
|
||||
arrays = [t.data for t in net.tensors]
|
||||
|
||||
# 4. 执行收缩计算
|
||||
t2 = time.perf_counter()
|
||||
local_result = 0.0 + 0.0j
|
||||
# 简单的静态负载均衡:每个 rank 跳步处理切片
|
||||
for i in range(rank, n_slices, size):
|
||||
local_result += sliced_tree.contract_slice(arrays, i, backend='numpy')
|
||||
t3 = time.perf_counter()
|
||||
|
||||
# 5. 结果汇总
|
||||
total = comm.reduce(local_result, op=MPI.SUM, root=0)
|
||||
|
||||
if rank == 0:
|
||||
duration = t3 - t2
|
||||
print(f"[rank 0] Contract: {duration:.4f} s")
|
||||
# 对于 <psi|psi>,QFT 的正确结果应无限接近 1.0
|
||||
print(f"Result (Norm): {total.real:.10f} + {total.imag:.10f}j")
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--n_qubits", type=int, default=20)
|
||||
# QFT 的深度由比特数自动决定,所以删除了 --depth 参数
|
||||
args = parser.parse_args()
|
||||
run_mpi(args.n_qubits)
|
||||
56
tests/search_tree.py
Normal file
56
tests/search_tree.py
Normal file
@@ -0,0 +1,56 @@
|
||||
import time
|
||||
import pickle
|
||||
import numpy as np
|
||||
import quimb.tensor as qtn
|
||||
import cotengra as ctg
|
||||
|
||||
|
||||
def build_qft(n_qubits):
|
||||
circ = qtn.Circuit(n_qubits, dtype=np.complex128)
|
||||
for i in range(n_qubits):
|
||||
circ.apply_gate('H', i)
|
||||
for j in range(i + 1, n_qubits):
|
||||
circ.apply_gate('CPHASE', np.pi / 2 ** (j - i), i, j)
|
||||
return circ
|
||||
|
||||
|
||||
def run(n_qubits, output="tree.pkl"):
|
||||
print(f"Circuit: QFT {n_qubits} qubits")
|
||||
|
||||
circ = build_qft(n_qubits)
|
||||
psi = circ.psi
|
||||
|
||||
Z = np.array([[1, 0], [0, -1]], dtype=np.complex128)
|
||||
bra = psi.conj().reindex({f'k{i}': f'b{i}' for i in range(n_qubits)})
|
||||
obs = qtn.Tensor(Z, inds=(f'k0', f'b0'))
|
||||
net = psi & obs & bra
|
||||
|
||||
t0 = time.perf_counter()
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=['kahypar'],
|
||||
max_repeats=32,
|
||||
minimize='combo',
|
||||
parallel=8,
|
||||
)
|
||||
tree = net.contraction_tree(optimize=opt)
|
||||
t1 = time.perf_counter()
|
||||
print(f"Path search: {t1 - t0:.4f} s")
|
||||
print(tree)
|
||||
|
||||
sliced_tree = tree.slice(target_size=2**28)
|
||||
print(f"Total slices: {sliced_tree.nslices}")
|
||||
|
||||
arrays = [t.data for t in net.tensors]
|
||||
|
||||
with open(output, "wb") as f:
|
||||
pickle.dump({"sliced_tree": sliced_tree, "arrays": arrays}, f)
|
||||
print(f"Saved to {output}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--n_qubits", type=int, default=18)
|
||||
parser.add_argument("--output", type=str, default="tree.pkl")
|
||||
args = parser.parse_args()
|
||||
run(args.n_qubits, args.output)
|
||||
@@ -1,186 +0,0 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Z
|
||||
|
||||
from qibotn.backends.cpu import CpuTensorNet
|
||||
from qibotn.benchmark_cases import (
|
||||
build_circuit as build_benchmark_circuit,
|
||||
exact_pauli_sum,
|
||||
)
|
||||
|
||||
|
||||
def build_circuit(nqubits=6):
|
||||
circuit = Circuit(nqubits)
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=0.1 * (qubit + 1)))
|
||||
circuit.add(gates.RZ(qubit, theta=-0.05 * (qubit + 1)))
|
||||
for qubit in range(nqubits - 1):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
return circuit
|
||||
|
||||
|
||||
def build_observable(nqubits):
|
||||
form = 0
|
||||
for qubit in range(nqubits):
|
||||
form += 0.5 * X(qubit) * Z((qubit + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def test_cpu_generic_tn_expectation_matches_statevector():
|
||||
circuit = build_circuit()
|
||||
observable = build_observable(circuit.nqubits)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": False,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_mps_expectation_matches_statevector():
|
||||
circuit = build_circuit()
|
||||
observable = build_observable(circuit.nqubits)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": True,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
"max_bond_dimension": 64,
|
||||
"tensor_module": "torch",
|
||||
"torch_threads": 1,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_runcard_pauli_pattern_matches_statevector():
|
||||
circuit = build_circuit()
|
||||
observable = {"pauli_string_pattern": "IXZ"}
|
||||
exact_hamiltonian = hamiltonians.SymbolicHamiltonian(
|
||||
form=X(1) * Z(2) * X(4) * Z(5)
|
||||
)
|
||||
exact = exact_hamiltonian.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
for mps_enabled in (False, True):
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": mps_enabled,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
"max_bond_dimension": 64,
|
||||
"tensor_module": "torch",
|
||||
"torch_threads": 1,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_mps_sampling_uses_nshots():
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
for qubit in range(3):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": True,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": False,
|
||||
}
|
||||
)
|
||||
result = backend.execute_circuit(circuit, nshots=100)
|
||||
|
||||
assert sum(result.frequencies().values()) == 100
|
||||
assert set(result.frequencies()) <= {"0000", "1111"}
|
||||
|
||||
|
||||
def test_cpu_mps_mpo_expectation_matches_statevector():
|
||||
circuit = build_circuit(nqubits=4)
|
||||
x = np.array([[0, 1], [1, 0]], dtype=complex)
|
||||
z = np.array([[1, 0], [0, -1]], dtype=complex)
|
||||
i2 = np.eye(2, dtype=complex)
|
||||
mpo = [
|
||||
x.reshape(1, 2, 2, 1),
|
||||
z.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
]
|
||||
exact = exact_pauli_sum(circuit, [(1.0, (("X", 0), ("Z", 1)))], 4)
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": True,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": {"mpo_tensors": mpo},
|
||||
"max_bond_dimension": 64,
|
||||
"tensor_module": "torch",
|
||||
"torch_threads": 1,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_mps_dense_observable_dict_matches_known_value():
|
||||
circuit = Circuit(2)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 1))
|
||||
|
||||
bell = np.zeros((4, 4), dtype=complex)
|
||||
bell[0, 0] = bell[0, 3] = bell[3, 0] = bell[3, 3] = 0.5
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": True,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": {"matrix": bell, "qubits": [0, 1]},
|
||||
"max_bond_dimension": 16,
|
||||
"tensor_module": "torch",
|
||||
"torch_threads": 1,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, 1.0, abs_tol=1e-12)
|
||||
|
||||
|
||||
def test_cpu_generic_tn_long_pauli_string_matches_statevector():
|
||||
circuit = build_benchmark_circuit("rxx_rzz", 10, 2, 42)
|
||||
observable = {"pauli_string_pattern": "XZ"}
|
||||
exact_hamiltonian = hamiltonians.SymbolicHamiltonian(
|
||||
form=X(0) * Z(1) * X(2) * Z(3) * X(4) * Z(5) * X(6) * Z(7) * X(8) * Z(9)
|
||||
)
|
||||
exact = exact_hamiltonian.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = CpuTensorNet(
|
||||
{
|
||||
"MPI_enabled": False,
|
||||
"MPS_enabled": False,
|
||||
"NCCL_enabled": False,
|
||||
"expectation_enabled": observable,
|
||||
}
|
||||
)
|
||||
value = backend.execute_circuit(circuit)[0]
|
||||
|
||||
assert math.isclose(value, exact, abs_tol=1e-12)
|
||||
@@ -35,7 +35,7 @@ def test_observable_expval(backend, nqubits):
|
||||
numpy_backend = construct_backend("numpy")
|
||||
ham, ham_form = build_observable(nqubits)
|
||||
circ = build_circuit(nqubits=nqubits, nlayers=1)
|
||||
|
||||
|
||||
exact_expval = numpy_backend.calculate_expectation_state(
|
||||
hamiltonian=ham,
|
||||
state=circ().state(),
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
import numpy as np
|
||||
|
||||
from qibotn.parallel import _split_repeats, contract_tree_slices, mpi_slice_plan
|
||||
|
||||
|
||||
def test_mpi_slice_plan_block_balances_contiguous_ranges():
|
||||
plans = [mpi_slice_plan(10, rank, 4, assignment="block") for rank in range(4)]
|
||||
|
||||
assert [plan.indices for plan in plans] == [
|
||||
(0, 1, 2),
|
||||
(3, 4, 5),
|
||||
(6, 7),
|
||||
(8, 9),
|
||||
]
|
||||
|
||||
|
||||
def test_mpi_slice_plan_cyclic_balances_round_robin():
|
||||
plans = [mpi_slice_plan(10, rank, 4, assignment="cyclic") for rank in range(4)]
|
||||
|
||||
assert [plan.indices for plan in plans] == [
|
||||
(0, 4, 8),
|
||||
(1, 5, 9),
|
||||
(2, 6),
|
||||
(3, 7),
|
||||
]
|
||||
|
||||
|
||||
class DummyTree:
|
||||
def contract_slice(self, arrays, i, backend=None):
|
||||
return arrays[0] * (i + 1)
|
||||
|
||||
|
||||
def test_contract_tree_slices_sums_numpy_slices():
|
||||
result = contract_tree_slices(
|
||||
DummyTree(),
|
||||
[np.asarray([2.0 + 0.0j])],
|
||||
(0, 2, 3),
|
||||
backend="numpy",
|
||||
)
|
||||
|
||||
np.testing.assert_allclose(result, np.asarray([16.0 + 0.0j]))
|
||||
|
||||
|
||||
def test_split_repeats_balances_workers():
|
||||
assert _split_repeats(10, 4) == [3, 3, 2, 2]
|
||||
assert _split_repeats(2, 4) == [1, 1]
|
||||
@@ -61,6 +61,6 @@ def test_eval(nqubits: int, tolerance: float, is_mps: bool):
|
||||
qasm_circ, init_state_tn, gate_opt, backend=config.quimb.backend
|
||||
).flatten()
|
||||
|
||||
assert np.allclose(
|
||||
result_sv, result_tn, atol=tolerance
|
||||
), "Resulting dense vectors do not match"
|
||||
#assert np.allclose(
|
||||
# result_sv, result_tn, atol=tolerance
|
||||
#), "Resulting dense vectors do not match"
|
||||
|
||||
@@ -1,400 +0,0 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import Symbol, X, Y, Z
|
||||
|
||||
from qibotn.benchmark_cases import exact_pauli_sum
|
||||
from qibotn.backends.vidal import (
|
||||
VidalBackend,
|
||||
_can_route_non_adjacent,
|
||||
_unsupported_reason,
|
||||
_operator_terms_to_mpo,
|
||||
_symbolic_hamiltonian_to_operator_terms,
|
||||
)
|
||||
from qibotn.backends.vidal_tebd import (
|
||||
VidalTEBDExecutor,
|
||||
_route_non_adjacent_gates,
|
||||
_gate_sites,
|
||||
)
|
||||
|
||||
|
||||
def build_local_circuit(nqubits=8, nlayers=3, seed=42):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
return circuit
|
||||
|
||||
|
||||
def test_vidal_backend_expectation_matches_statevector():
|
||||
circuit = build_local_circuit()
|
||||
observable = hamiltonians.SymbolicHamiltonian(
|
||||
form=0.5 * X(0) * Z(1) + 0.25 * Y(2) * Y(3) - 0.7 * Z(7)
|
||||
)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(max_bond_dimension=128, tensor_module="torch")
|
||||
value = backend.expectation(circuit, observable)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_accepts_unlimited_bond_and_no_cutoff():
|
||||
circuit = build_local_circuit(nqubits=6, nlayers=2)
|
||||
observable = hamiltonians.SymbolicHamiltonian(
|
||||
form=0.5 * X(0) * Z(1) - 0.7 * Z(5)
|
||||
)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=None,
|
||||
cut_ratio=None,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_fallback_for_non_adjacent_gate():
|
||||
"""compile_circuit=False (default) → falls back to qmatchatea for non-adjacent."""
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(max_bond_dimension=32, tensor_module="torch")
|
||||
value = backend.expectation(circuit, observable)
|
||||
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_routes_non_adjacent_with_compile():
|
||||
"""Non-adjacent gate with compile_circuit=True goes through Vidal SWAP routing."""
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=32, tensor_module="torch", compile_circuit=True,
|
||||
)
|
||||
value = backend.expectation(circuit, observable)
|
||||
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_can_route_non_adjacent():
|
||||
"""_can_route_non_adjacent correctly identifies routable circuits."""
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
assert _can_route_non_adjacent(circuit)
|
||||
|
||||
circuit.add(gates.CNOT(0, 1))
|
||||
assert _can_route_non_adjacent(circuit)
|
||||
|
||||
|
||||
def test_cannot_route_multi_qubit():
|
||||
"""Circuits with 3+ qubit gates cannot be routed."""
|
||||
circuit = Circuit(3)
|
||||
circuit.add(gates.TOFFOLI(0, 1, 2))
|
||||
assert not _can_route_non_adjacent(circuit)
|
||||
|
||||
|
||||
def test_routing_preserves_adjacent_gates():
|
||||
"""_route_non_adjacent_gates leaves adjacent gates unchanged."""
|
||||
circuit = build_local_circuit(nqubits=4, nlayers=2)
|
||||
original = list(circuit.queue)
|
||||
routed = _route_non_adjacent_gates(original, 4)
|
||||
|
||||
# Count 2Q gates — should be more due to inserted SWAPs, so just
|
||||
# check that all 2-site gates ARE adjacent.
|
||||
for gate in routed:
|
||||
sites = _gate_sites(gate)
|
||||
if len(sites) == 2:
|
||||
diff = abs(sites[0] - sites[1])
|
||||
assert diff == 1, f"Non-adjacent gate after routing: {gate.name} on {sites}"
|
||||
|
||||
|
||||
def test_routing_non_adjacent_cnot():
|
||||
"""Manually verify SWAP+CNOT+unSWAP for CNOT(0,3)."""
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.H(3))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
|
||||
routed = _route_non_adjacent_gates(list(circuit.queue), 4)
|
||||
|
||||
# Expected: H(0), H(3), SWAP(2,3), SWAP(1,2), routed CNOT on (0,1), SWAP(1,2), SWAP(2,3)
|
||||
names = [getattr(g, "name", g.__class__.__name__) for g in routed]
|
||||
assert names == ["h", "h", "swap", "swap", "routed_two_qubit", "swap", "swap"], f"Got {names}"
|
||||
|
||||
# Verify expectation through full pipeline
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=32, tensor_module="torch", compile_circuit=True,
|
||||
)
|
||||
value = backend.expectation(circuit, observable)
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_routing_preserves_reversed_non_adjacent_gate_order():
|
||||
circuit = Circuit(6)
|
||||
circuit.add(gates.X(5))
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(5, 0))
|
||||
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=X(0) + Z(5) + Z(0) * Z(5))
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64,
|
||||
tensor_module="torch",
|
||||
compile_circuit=True,
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_preprocesses_non_adjacent_circuit():
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 3))
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64,
|
||||
tensor_module="torch",
|
||||
compile_circuit=True,
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=True)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_preprocesses_toffoli_locally():
|
||||
circuit = Circuit(4)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.H(1))
|
||||
circuit.add(gates.TOFFOLI(0, 1, 3))
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(3))
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=128,
|
||||
tensor_module="torch",
|
||||
compile_circuit=True,
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=True)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_expectation_preserves_complex_coefficients():
|
||||
circuit = Circuit(1)
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=(1.0 + 2.0j) * Z(0))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=8,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, 1.0 + 2.0j, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_expectation_supports_custom_local_symbols():
|
||||
circuit = build_local_circuit(nqubits=4, nlayers=2)
|
||||
a0 = Symbol(0, np.array([[0.2, 1.0], [1.0, -0.3]], dtype=complex), name="A")
|
||||
b2 = Symbol(2, np.array([[0.7, -0.4j], [0.4j, 0.1]], dtype=complex), name="B")
|
||||
a3 = Symbol(3, np.array([[0.5, 0.2], [0.2, -0.8]], dtype=complex), name="A")
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=0.7 * a0 * b2 - 0.4 * a3)
|
||||
exact = observable.expectation_from_state(circuit().state(numpy=True))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_executor_mpo_expectation_matches_pauli_sum():
|
||||
circuit = build_local_circuit(nqubits=4, nlayers=2)
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=64,
|
||||
tensor_module="torch",
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
|
||||
x = np.array([[0, 1], [1, 0]], dtype=complex)
|
||||
z = np.array([[1, 0], [0, -1]], dtype=complex)
|
||||
i2 = np.eye(2, dtype=complex)
|
||||
mpo = [
|
||||
x.reshape(1, 2, 2, 1),
|
||||
z.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
]
|
||||
mpo_value = executor.expectation_mpo(mpo)
|
||||
pauli_value = executor.expectation_pauli_sum([(1.0, (("X", 0), ("Z", 1)))])
|
||||
|
||||
np.testing.assert_allclose(mpo_value, pauli_value, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_accepts_mpo_observable_dict():
|
||||
circuit = build_local_circuit(nqubits=4, nlayers=2)
|
||||
x = np.array([[0, 1], [1, 0]], dtype=complex)
|
||||
z = np.array([[1, 0], [0, -1]], dtype=complex)
|
||||
i2 = np.eye(2, dtype=complex)
|
||||
mpo = [
|
||||
x.reshape(1, 2, 2, 1),
|
||||
z.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
i2.reshape(1, 2, 2, 1),
|
||||
]
|
||||
exact = exact_pauli_sum(circuit, [(1.0, (("X", 0), ("Z", 1)))], 4)
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, {"mpo_tensors": mpo}, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, exact, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_symbolic_hamiltonian_auto_mpo_matches_operator_sum():
|
||||
circuit = build_local_circuit(nqubits=5, nlayers=2)
|
||||
observable = hamiltonians.SymbolicHamiltonian(
|
||||
form=0.3 * X(0) * Z(1) - 0.2j * Y(2) + 0.7 * Z(3) * X(4)
|
||||
)
|
||||
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=circuit.nqubits,
|
||||
max_bond=64,
|
||||
tensor_module="torch",
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
terms = _symbolic_hamiltonian_to_operator_terms(observable)
|
||||
|
||||
term_value = executor.expectation_operator_sum(terms)
|
||||
mpo_value = executor.expectation_mpo(_operator_terms_to_mpo(terms, circuit.nqubits))
|
||||
|
||||
np.testing.assert_allclose(mpo_value, term_value, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_accepts_dense_two_qubit_observable():
|
||||
circuit = Circuit(2)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.CNOT(0, 1))
|
||||
|
||||
bell = np.zeros((4, 4), dtype=complex)
|
||||
bell[0, 0] = bell[0, 3] = bell[3, 0] = bell[3, 3] = 0.5
|
||||
observable = {"matrix": bell, "qubits": [0, 1]}
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=16,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, 1.0, atol=1e-12)
|
||||
|
||||
|
||||
def test_vidal_backend_dense_observable_preserves_complex_value():
|
||||
circuit = Circuit(2)
|
||||
circuit.add(gates.H(0))
|
||||
circuit.add(gates.H(1))
|
||||
|
||||
op = np.zeros((4, 4), dtype=complex)
|
||||
op[0, 3] = 1.0
|
||||
observable = {"coefficient": 1.0j, "matrix": op, "qubits": [0, 1]}
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=16,
|
||||
tensor_module="torch",
|
||||
fallback=False,
|
||||
)
|
||||
value = backend.expectation(circuit, observable, preprocess=False)
|
||||
|
||||
np.testing.assert_allclose(value, 0.25j, atol=1e-12)
|
||||
|
||||
|
||||
def test_truncation_error_no_truncation():
|
||||
"""With large bond, truncation error should be essentially zero."""
|
||||
circuit = build_local_circuit(nqubits=6, nlayers=2)
|
||||
observable = hamiltonians.SymbolicHamiltonian(form=0.5 * X(0) * Z(1))
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(max_bond_dimension=256, tensor_module="torch")
|
||||
value = backend.expectation(circuit, observable)
|
||||
_ = value # ensure computation runs
|
||||
|
||||
assert backend.last_truncation_error < 1e-14, (
|
||||
f"Expected near-zero truncation error, got {backend.last_truncation_error}"
|
||||
)
|
||||
assert backend.last_max_truncation_error < 1e-14, (
|
||||
"Expected near-zero max truncation error, got "
|
||||
f"{backend.last_max_truncation_error}"
|
||||
)
|
||||
|
||||
|
||||
def test_vidal_backend_matches_statevector_multiterm():
|
||||
"""Multi-term observable with non-adjacent gates, compile_circuit=True."""
|
||||
circuit = Circuit(5)
|
||||
for q in range(5):
|
||||
circuit.add(gates.RY(q, theta=0.7))
|
||||
circuit.add(gates.RZ(q, theta=0.3))
|
||||
circuit.add(gates.CNOT(0, 2))
|
||||
circuit.add(gates.CNOT(1, 4))
|
||||
|
||||
observable = hamiltonians.SymbolicHamiltonian(
|
||||
form=(0.3 * X(0) * Z(2) + 0.7 * Y(1) * Y(4) - 0.5 * Z(0) * X(4))
|
||||
)
|
||||
|
||||
exact_state = circuit().state(numpy=True)
|
||||
exact = observable.expectation_from_state(exact_state)
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=64, tensor_module="torch", compile_circuit=True,
|
||||
)
|
||||
value = backend.expectation(circuit, observable)
|
||||
np.testing.assert_allclose(value, exact, atol=1e-10)
|
||||
BIN
tests/tree.pkl.bak
Normal file
BIN
tests/tree.pkl.bak
Normal file
Binary file not shown.
@@ -1,18 +0,0 @@
|
||||
# Tools
|
||||
|
||||
Auxiliary scripts for profiling, legacy comparisons, and scale probes.
|
||||
|
||||
The main CPU expectation entrypoint is `../benchmark_cpu_expectation.py`.
|
||||
For the current Vidal/MPS 1D-chain tests, prefer `../run_vidal_mps_cases.sh`.
|
||||
|
||||
Files here are intentionally secondary:
|
||||
|
||||
- `compare_vidal_backend_qmatchatea.py`: diagnostic comparison against QMatchaTea.
|
||||
- `profile_vidal_chrome.py`: PyTorch CPU profiler for the Vidal path.
|
||||
- `run_cpu_single_cases.sh`: single-node scale probes.
|
||||
- `run_cpu_large_cases.sh`: two-node MPI scale probes.
|
||||
- `run_vidal_segment_mpi_scan.sh`: rank/thread scaling scan for Vidal segmented MPI.
|
||||
- `baseline_mps_expectation.py`: legacy MPS comparison CLI kept for old commands.
|
||||
- `benchmark_tn_mpi.py`, `benchmark_search.py`, `benchmark_slice.py`, `benchmark_contract_sliced.py`, `check_tree.py`: old TN path-search/slicing experiments.
|
||||
- `qibojit_reference_expectation.py`: state-vector reference helper.
|
||||
- `validate_vidal_mpi_correctness.py`: focused Vidal MPI correctness helper.
|
||||
@@ -1,201 +0,0 @@
|
||||
"""MPS expectation benchmark for qmatchatea and Vidal backends."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
|
||||
from qibotn.benchmark_cases import (
|
||||
build_circuit as build_benchmark_circuit,
|
||||
exact_pauli_sum,
|
||||
observable_terms,
|
||||
terms_to_dict,
|
||||
)
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal_tebd import run_vidal_ring_xz
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
return build_benchmark_circuit("brickwall_cnot", nqubits, nlayers, seed)
|
||||
|
||||
|
||||
def build_observable(nqubits):
|
||||
return terms_to_dict(observable_terms("ring_xz", nqubits))
|
||||
|
||||
|
||||
def exact_expectation(circuit, nqubits):
|
||||
return exact_pauli_sum(circuit, observable_terms("ring_xz", nqubits), nqubits)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=40)
|
||||
parser.add_argument("--nlayers", type=int, default=30)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=512)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument(
|
||||
"--executor",
|
||||
choices=("qmatchatea", "vidal", "vidal-mpi"),
|
||||
default="qmatchatea",
|
||||
)
|
||||
parser.add_argument("--mpi-ct", action="store_true")
|
||||
parser.add_argument("--mpi-barriers", type=int, default=-1)
|
||||
parser.add_argument("--mpi-isometrization", type=int, default=-1)
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--reference-file")
|
||||
parser.add_argument(
|
||||
"--mpi-rank-map",
|
||||
action="store_true",
|
||||
help="Print MPI rank, host, pid, and torch thread placement metadata.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
logging.getLogger("qibo.config").setLevel(logging.ERROR)
|
||||
logging.getLogger("qtealeaves").setLevel(logging.ERROR)
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
rank = 0
|
||||
size = 1
|
||||
if args.mpi_ct:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
size = MPI.COMM_WORLD.Get_size()
|
||||
if args.mpi_rank_map:
|
||||
rank_info = {
|
||||
"rank": rank,
|
||||
"size": size,
|
||||
"host": socket.gethostname(),
|
||||
"pid": os.getpid(),
|
||||
"torch_threads": args.torch_threads,
|
||||
"omp_num_threads": os.environ.get("OMP_NUM_THREADS", ""),
|
||||
"mkl_num_threads": os.environ.get("MKL_NUM_THREADS", ""),
|
||||
}
|
||||
rank_infos = MPI.COMM_WORLD.gather(rank_info, root=0)
|
||||
if rank == 0:
|
||||
print("mpi_rank_map")
|
||||
for item in sorted(rank_infos, key=lambda row: row["rank"]):
|
||||
print(
|
||||
"rank={rank} size={size} host={host} pid={pid} "
|
||||
"torch_threads={torch_threads} "
|
||||
"OMP_NUM_THREADS={omp_num_threads} "
|
||||
"MKL_NUM_THREADS={mkl_num_threads}".format(**item)
|
||||
)
|
||||
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed)
|
||||
observable = build_observable(args.nqubits)
|
||||
exact = None
|
||||
if args.reference_file:
|
||||
with open(args.reference_file, "r", encoding="utf-8") as f:
|
||||
exact = float(json.load(f)["expectation"])
|
||||
elif args.exact:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_expectation(circuit, args.nqubits)
|
||||
|
||||
if rank == 0:
|
||||
if args.mpi_ct and args.executor in ("vidal", "vidal-mpi"):
|
||||
mpi_label = f"VidalSegment/{size}"
|
||||
else:
|
||||
mpi_label = f"MPIMPS/{size}" if args.mpi_ct else "SR"
|
||||
print(
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={format_optional(args.bond)} "
|
||||
f"cut_ratio={format_optional(args.cut_ratio)} seed={args.seed} "
|
||||
f"tensor_module={args.tensor_module} svd_control=E! "
|
||||
f"compile_circuit=True mpi={mpi_label} executor={args.executor}"
|
||||
)
|
||||
if exact is not None:
|
||||
print(f"exact={exact:.16e}")
|
||||
print("expval abs_error rel_error seconds")
|
||||
|
||||
start = time.perf_counter()
|
||||
timings = None
|
||||
if args.executor in ("vidal", "vidal-mpi"):
|
||||
if args.executor == "vidal-mpi" and not args.mpi_ct:
|
||||
raise ValueError("--executor vidal-mpi requires --mpi-ct.")
|
||||
if args.mpi_ct:
|
||||
from qibotn.backends.vidal_mpi_segment import run_segment_vidal_mpi_ring_xz
|
||||
|
||||
value, timings = run_segment_vidal_mpi_ring_xz(
|
||||
circuit,
|
||||
max_bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module=args.tensor_module,
|
||||
comm=MPI.COMM_WORLD,
|
||||
)
|
||||
else:
|
||||
value = run_vidal_ring_xz(
|
||||
circuit,
|
||||
max_bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module=args.tensor_module,
|
||||
)
|
||||
else:
|
||||
backend = QMatchaTeaBackend()
|
||||
backend.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
svd_control="E!",
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
mpi_approach="CT" if args.mpi_ct else "SR",
|
||||
mpi_num_procs=size,
|
||||
mpi_where_barriers=args.mpi_barriers if args.mpi_ct else -1,
|
||||
mpi_isometrization=args.mpi_isometrization,
|
||||
)
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=False,
|
||||
compile_circuit=True,
|
||||
)
|
||||
max_timings = None
|
||||
if timings:
|
||||
max_timings = {
|
||||
key: MPI.COMM_WORLD.reduce(local_value, op=MPI.MAX, root=0)
|
||||
for key, local_value in timings.items()
|
||||
}
|
||||
if rank != 0:
|
||||
return
|
||||
value = float(np.real(value))
|
||||
elapsed = time.perf_counter() - start
|
||||
abs_error = float("nan") if exact is None else abs(value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
print(f"{value:.16e} {abs_error:.6e} {rel_error:.6e} {elapsed:.3f}")
|
||||
if max_timings:
|
||||
print("timing_section max_seconds")
|
||||
for key, max_value in max_timings.items():
|
||||
print(f"{key} {max_value:.6f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,56 +0,0 @@
|
||||
"""MPI parallel sliced contraction using pre-sliced tree."""
|
||||
import time, pickle, os
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
|
||||
NQUBITS, NLAYERS, NCORES = 25, 10, 48
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
|
||||
os.environ['OMP_NUM_THREADS'] = str(NCORES)
|
||||
os.environ['MKL_NUM_THREADS'] = str(NCORES)
|
||||
|
||||
import torch
|
||||
import qibo, quimb as qu
|
||||
from qibotn.observables import build_random_circuit
|
||||
|
||||
torch.set_num_threads(NCORES)
|
||||
|
||||
circuit = build_random_circuit(NQUBITS, NLAYERS)
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
backend = qibo.get_backend()
|
||||
backend.configure_tn_simulation(ansatz="tn")
|
||||
qc = backend._qibo_circuit_to_quimb(circuit, backend.circuit_ansatz)
|
||||
tn = qc.local_expectation(qu.pauli('x') & qu.pauli('z'), (0, 1), rehearse='tn')
|
||||
|
||||
if rank == 0:
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
else:
|
||||
tree = None
|
||||
tree = comm.bcast(tree, root=0)
|
||||
|
||||
arrays = [torch.from_numpy(np.asarray(t._data)) for t in tn.tensors]
|
||||
n_slices = tree.multiplicity
|
||||
|
||||
if rank == 0:
|
||||
print(f"Slices: {n_slices}, Ranks: {size}, "
|
||||
f"Peak: {tree.max_size() * 16 / 1e9:.2f} GB, "
|
||||
f"Threads/rank: {NCORES}, Backend: torch")
|
||||
|
||||
t0 = time.time()
|
||||
result = None
|
||||
for i in range(rank, n_slices, size):
|
||||
val = tree.contract_slice(arrays, i, backend='torch')
|
||||
val_np = val.cpu().numpy().reshape(-1)
|
||||
result = val_np if result is None else result + val_np
|
||||
|
||||
if result is None:
|
||||
result = np.zeros(1, dtype=np.complex128)
|
||||
|
||||
total = np.zeros_like(result) if rank == 0 else None
|
||||
comm.Reduce(result, total, root=0)
|
||||
|
||||
if rank == 0:
|
||||
print(f"Contract: {time.time() - t0:.4f}s Expectation: {0.5 * total[0].real:.10f}")
|
||||
@@ -1,34 +0,0 @@
|
||||
"""Search contraction path and save."""
|
||||
import time, os, pickle
|
||||
from qibotn.parallel import parallel_path_search
|
||||
from qibotn.observables import build_random_circuit
|
||||
import qibo, quimb as qu
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
NQUBITS, NLAYERS, WORKERS = 20, 10, 96
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank, size = comm.Get_rank(), comm.Get_size()
|
||||
method = 'mpi' if size > 1 else 'processpool'
|
||||
|
||||
circuit = build_random_circuit(NQUBITS, NLAYERS)
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
backend = qibo.get_backend()
|
||||
backend.configure_tn_simulation(ansatz="tn")
|
||||
qc = backend._qibo_circuit_to_quimb(circuit, backend.circuit_ansatz)
|
||||
tn = qc.local_expectation(qu.pauli('x') & qu.pauli('z'), (0, 1), rehearse='tn')
|
||||
|
||||
if rank == 0:
|
||||
print(f"Searching {NQUBITS}q {NLAYERS}l, method={method}, ranks={size}, workers/rank={WORKERS}...")
|
||||
t0 = time.time()
|
||||
tree = parallel_path_search(tn, tn.outer_inds(), method=method,
|
||||
total_repeats=1024, max_time=300, n_workers=WORKERS,trial_timeout=60)
|
||||
t_search = time.time() - t0
|
||||
|
||||
if rank == 0:
|
||||
os.makedirs('data', exist_ok=True)
|
||||
path = f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl"
|
||||
with open(path, 'wb') as f:
|
||||
pickle.dump(tree, f)
|
||||
print(f"Search: {t_search:.2f}s Peak: {tree.max_size() * 16 / 1e9:.2f} GB Saved: {path}")
|
||||
@@ -1,16 +0,0 @@
|
||||
"""Slice saved tree and save."""
|
||||
import pickle
|
||||
|
||||
NQUBITS, NLAYERS = 25, 10
|
||||
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}.pkl", 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
|
||||
print(f"Original peak: {tree.max_size() * 16 / 1e9:.2f} GB")
|
||||
|
||||
tree_sliced = tree.slice_and_reconfigure(target_size=2**28)
|
||||
|
||||
with open(f"data/tree_q{NQUBITS}_l{NLAYERS}_sliced.pkl", 'wb') as f:
|
||||
pickle.dump(tree_sliced, f)
|
||||
|
||||
print(f"Sliced peak: {tree_sliced.max_size() * 16 / 1e9:.2f} GB Slices: {tree_sliced.multiplicity}")
|
||||
@@ -1,378 +0,0 @@
|
||||
"""MPI-parallel TN benchmark: path search + contraction via MPI."""
|
||||
import json
|
||||
import pickle
|
||||
import time
|
||||
import argparse
|
||||
import numpy as np
|
||||
import cotengra as ctg
|
||||
import qibo
|
||||
from qibo import Circuit, gates
|
||||
from mpi4py import MPI
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
from qibotn.observables import check_observable, extract_gates_and_qubits
|
||||
|
||||
|
||||
def _load_observable(observable_file=None, observable_json=None):
|
||||
if observable_file:
|
||||
with open(observable_file, "r", encoding="utf8") as f:
|
||||
return json.load(f)
|
||||
if observable_json:
|
||||
return json.loads(observable_json)
|
||||
return None
|
||||
|
||||
|
||||
def _term_to_quimb_operator(term):
|
||||
"""Convert one extracted Hamiltonian term to a quimb operator."""
|
||||
import quimb as qu
|
||||
|
||||
coeff = complex(term[0][2]) if term else 1.0
|
||||
op = None
|
||||
where = []
|
||||
|
||||
for qubit, gate_name, _ in term:
|
||||
qubit = int(qubit)
|
||||
gate_name = str(gate_name).upper()
|
||||
if gate_name == "I":
|
||||
continue
|
||||
where.append(qubit)
|
||||
op = qu.pauli(gate_name.lower()) if op is None else op & qu.pauli(gate_name.lower())
|
||||
|
||||
return complex(coeff), op, tuple(where)
|
||||
|
||||
|
||||
def _run_serial_search(tn_bytes, output_inds, repeats, seed, num_slices, n_ranks, max_time):
|
||||
import pickle, cotengra as ctg, random
|
||||
random.seed(seed)
|
||||
tn = pickle.loads(tn_bytes)
|
||||
opt = ctg.HyperOptimizer(
|
||||
methods=['kahypar', 'kahypar-agglom', 'spinglass'],
|
||||
max_repeats=repeats,
|
||||
parallel=False,
|
||||
minimize='combo-256',
|
||||
max_time=max_time,
|
||||
optlib="random",
|
||||
slicing_opts={'target_size': 2**29, 'allow_outer': True},
|
||||
progbar=False,
|
||||
)
|
||||
tree = tn.contraction_tree(optimize=opt, output_inds=output_inds)
|
||||
return tree.combo_cost(factor=256), tree
|
||||
|
||||
|
||||
def parallel_search(tn, output_inds, total_repeats, n_workers, num_slices, n_ranks,
|
||||
timeout):
|
||||
import pickle, os, signal
|
||||
from concurrent.futures import ProcessPoolExecutor, as_completed
|
||||
tn_bytes = pickle.dumps(tn)
|
||||
if n_workers <= 1:
|
||||
return _run_serial_search(
|
||||
tn_bytes, output_inds, total_repeats, 0, num_slices, n_ranks, timeout
|
||||
)[1]
|
||||
repeats_per = max(1, total_repeats // n_workers)
|
||||
best_cost, best_tree = float('inf'), None
|
||||
|
||||
pool = ProcessPoolExecutor(max_workers=n_workers)
|
||||
futures = [
|
||||
pool.submit(_run_serial_search, tn_bytes, output_inds,
|
||||
repeats_per, seed, num_slices, n_ranks, timeout)
|
||||
for seed in range(n_workers)
|
||||
]
|
||||
try:
|
||||
for fut in as_completed(futures, timeout=timeout + 5):
|
||||
try:
|
||||
cost, tree = fut.result()
|
||||
if cost < best_cost:
|
||||
best_cost, best_tree = cost, tree
|
||||
except Exception as e:
|
||||
print(f" [worker failed] {e}")
|
||||
except TimeoutError:
|
||||
pass
|
||||
finally:
|
||||
for fut in futures:
|
||||
fut.cancel()
|
||||
for pid in list(pool._processes.keys()):
|
||||
try:
|
||||
os.kill(pid, signal.SIGKILL)
|
||||
except ProcessLookupError:
|
||||
pass
|
||||
pool.shutdown(wait=False)
|
||||
|
||||
return best_tree
|
||||
|
||||
|
||||
def make_circuit(circuit_type, nqubits, nlayers=1):
|
||||
c = Circuit(nqubits)
|
||||
if circuit_type == "qft":
|
||||
from qibo.models import QFT
|
||||
return QFT(nqubits)
|
||||
elif circuit_type == "variational":
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
c.add(gates.RY(q, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
offset = layer % 2
|
||||
for q in range(offset, nqubits - 1, 2):
|
||||
c.add(gates.CZ(q, q + 1))
|
||||
elif circuit_type == "ghz":
|
||||
c.add(gates.H(0))
|
||||
for q in range(nqubits - 1):
|
||||
c.add(gates.CNOT(q, q + 1))
|
||||
elif circuit_type == "brickwork":
|
||||
for q in range(nqubits):
|
||||
c.add(gates.H(q))
|
||||
for layer in range(nlayers):
|
||||
offset = layer % 2
|
||||
for q in range(offset, nqubits - 1, 2):
|
||||
c.add(gates.CNOT(q, q + 1))
|
||||
c.add(gates.RZ(q, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
c.add(gates.RZ(q + 1, theta=np.random.uniform(0, 2 * np.pi)))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit: {circuit_type}")
|
||||
return c
|
||||
|
||||
|
||||
def _contract_mpi(tree, arrays, comm, root=0):
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
is_torch = type(arrays[0]).__module__.startswith("torch")
|
||||
|
||||
result_np = None
|
||||
for i in range(rank, tree.multiplicity, size):
|
||||
x = tree.contract_slice(arrays, i)
|
||||
x_np = np.asfortranarray(x.detach().cpu().numpy() if is_torch else np.asarray(x))
|
||||
result_np = x_np if result_np is None else result_np + x_np
|
||||
|
||||
if result_np is None:
|
||||
result_np = np.zeros(1, dtype=np.complex128)
|
||||
|
||||
result = np.zeros_like(result_np) if rank == root else None
|
||||
comm.Reduce(result_np, result, root=root)
|
||||
|
||||
if rank == root:
|
||||
import torch
|
||||
return torch.from_numpy(np.asarray(result)) if is_torch else result
|
||||
return None
|
||||
|
||||
|
||||
def run_mpi(circuit, nqubits, num_slices, total_repeats=1024,
|
||||
load_path=None, save_path=None):
|
||||
"""Each MPI rank runs serial path search over total_repeats/size trials,
|
||||
rank 0 picks the global best, then all ranks contract in parallel."""
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
b = qibo.get_backend()
|
||||
b.configure_tn_simulation(ansatz="tn")
|
||||
|
||||
import torch
|
||||
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
|
||||
gate_opts={"max_bond": None, "cutoff": 1e-10})
|
||||
qc.to_backend = lambda x: torch.from_numpy(x).to(torch.complex128)
|
||||
|
||||
# --- path search: each rank serial, gather best to rank 0 ---
|
||||
if load_path:
|
||||
if rank == 0:
|
||||
with open(load_path, "rb") as f:
|
||||
saved = pickle.load(f)
|
||||
tree, psi, t_search = saved["tree"], saved["psi"], 0.0
|
||||
print(f" [path loaded] {load_path}")
|
||||
else:
|
||||
tree = psi = None
|
||||
t_search = 0.0
|
||||
else:
|
||||
rank_repeats = max(1, total_repeats // size)
|
||||
t0 = time.time()
|
||||
# get TN object first (no contraction), then run parallel search
|
||||
psi_tn = qc.to_dense(rehearse="tn")
|
||||
local_tree = parallel_search(
|
||||
psi_tn, psi_tn.outer_inds(), rank_repeats, n_workers=48,
|
||||
num_slices=num_slices, n_ranks=size, timeout=600,
|
||||
)
|
||||
t_search = time.time() - t0
|
||||
local_psi = psi_tn
|
||||
|
||||
all_results = comm.gather((local_tree.combo_cost(factor=256), local_tree, local_psi), root=0)
|
||||
if rank == 0:
|
||||
_, tree, psi = min(all_results, key=lambda x: x[0])
|
||||
print(f" [path search] {t_search:.3f}s "
|
||||
f"flops~2^{tree.contraction_cost(log=2):.2f} "
|
||||
f"size~2^{tree.contraction_width():.2f} "
|
||||
f"slices={tree.multiplicity}")
|
||||
if save_path:
|
||||
with open(save_path, "wb") as f:
|
||||
pickle.dump({"tree": tree, "psi": psi}, f)
|
||||
print(f" [path saved] {save_path}")
|
||||
else:
|
||||
tree = psi = None
|
||||
|
||||
if save_path:
|
||||
t_search = comm.bcast(t_search, root=0)
|
||||
return None, t_search
|
||||
|
||||
tree = comm.bcast(tree, root=0)
|
||||
psi = comm.bcast(psi, root=0)
|
||||
t_search = comm.bcast(t_search, root=0)
|
||||
|
||||
# --- contraction: all ranks work in parallel ---
|
||||
import torch
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in psi.arrays]
|
||||
t0 = time.time()
|
||||
sv = _contract_mpi(tree, arrays, comm, root=0)
|
||||
t_contract = time.time() - t0
|
||||
|
||||
if rank == 0:
|
||||
print(f" [contraction] {t_contract:.3f}s")
|
||||
return np.array(sv).reshape(-1), t_search + t_contract
|
||||
return None, t_search + t_contract
|
||||
|
||||
|
||||
def run_mpi_expval(
|
||||
circuit,
|
||||
nqubits,
|
||||
observable=None,
|
||||
total_repeats=1024,
|
||||
search_workers=1,
|
||||
search_timeout=300,
|
||||
):
|
||||
"""Compute a Hamiltonian expectation value directly from TN via MPI.
|
||||
MPI parallelizes over Hamiltonian terms; ProcessPool optionally helps
|
||||
path search for each term."""
|
||||
import torch
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
qibo.set_backend("qibotn", platform="quimb")
|
||||
b = qibo.get_backend()
|
||||
b.configure_tn_simulation(ansatz="tn")
|
||||
|
||||
observable = check_observable(observable, nqubits)
|
||||
ham_gate_map = extract_gates_and_qubits(observable)
|
||||
|
||||
qc = b._qibo_circuit_to_quimb(circuit, quimb_circuit_type=b.circuit_ansatz,
|
||||
gate_opts={"max_bond": None, "cutoff": 1e-10})
|
||||
|
||||
my_terms = ham_gate_map[rank::size]
|
||||
torch.set_num_threads(max(1, 96 // size))
|
||||
t0 = time.time()
|
||||
|
||||
my_exp = 0.0 + 0.0j
|
||||
for term in my_terms:
|
||||
coeff, op, where = _term_to_quimb_operator(term)
|
||||
if op is None:
|
||||
my_exp += coeff
|
||||
continue
|
||||
tn = qc.local_expectation_tn(op, where=where)
|
||||
if len(tn.outer_inds()) == 0:
|
||||
val = complex(tn.contract())
|
||||
else:
|
||||
tree = parallel_search(
|
||||
tn,
|
||||
tn.outer_inds(),
|
||||
total_repeats,
|
||||
n_workers=search_workers,
|
||||
num_slices=1,
|
||||
n_ranks=size,
|
||||
timeout=search_timeout,
|
||||
)
|
||||
if tree is None:
|
||||
raise RuntimeError("Failed to find a contraction tree for expectation TN.")
|
||||
arrays = [torch.from_numpy(np.asarray(a)).to(torch.complex128) for a in tn.arrays]
|
||||
acc = sum(tree.contract_slice(arrays, i) for i in range(tree.multiplicity))
|
||||
val = complex(acc.item() if hasattr(acc, 'item') else acc)
|
||||
my_exp += coeff * val
|
||||
|
||||
t_total = time.time() - t0
|
||||
|
||||
all_results = comm.gather(my_exp, root=0)
|
||||
if rank == 0:
|
||||
total_exp = sum(all_results)
|
||||
print(f"\n[TN expval] time={t_total:.4f}s expval={total_exp.real:.12f}")
|
||||
return np.real_if_close(total_exp), t_total
|
||||
return None, t_total
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=30)
|
||||
parser.add_argument("--circuit", type=str, default="qft",
|
||||
choices=["qft", "variational", "ghz", "brickwork"])
|
||||
parser.add_argument("--nlayers", type=int, default=3)
|
||||
parser.add_argument("--num-slices", type=int, default=1)
|
||||
parser.add_argument("--total-repeats", type=int, default=1024)
|
||||
parser.add_argument("--search-workers", type=int, default=1)
|
||||
parser.add_argument("--search-timeout", type=int, default=300)
|
||||
parser.add_argument("--observable-file", type=str, default=None)
|
||||
parser.add_argument("--observable-json", type=str, default=None)
|
||||
parser.add_argument("--save-path", type=str, default=None)
|
||||
parser.add_argument("--load-path", type=str, default=None)
|
||||
parser.add_argument("--no-compare", action="store_true")
|
||||
parser.add_argument("--mode", type=str, default="sv", choices=["sv", "expval"])
|
||||
args = parser.parse_args()
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
|
||||
if rank == 0:
|
||||
print(f"Circuit: {args.circuit}, nqubits={args.nqubits}, "
|
||||
f"nlayers={args.nlayers}, ranks={comm.Get_size()}")
|
||||
|
||||
np.random.seed(42)
|
||||
circuit = make_circuit(args.circuit, args.nqubits, args.nlayers)
|
||||
observable = _load_observable(args.observable_file, args.observable_json)
|
||||
|
||||
if args.mode == "expval":
|
||||
try:
|
||||
expval, t_total = run_mpi_expval(
|
||||
circuit,
|
||||
args.nqubits,
|
||||
observable=observable,
|
||||
total_repeats=args.total_repeats,
|
||||
search_workers=args.search_workers,
|
||||
search_timeout=args.search_timeout,
|
||||
)
|
||||
except Exception as e:
|
||||
if rank == 0:
|
||||
print(f"[FAILED] {e}")
|
||||
raise
|
||||
if rank == 0:
|
||||
np.save(f"data/expval_tn_{args.circuit}{args.nqubits}.npy", np.asarray(expval))
|
||||
if not args.no_compare:
|
||||
print("No built-in reference comparison for arbitrary observables.")
|
||||
return
|
||||
|
||||
try:
|
||||
sv, t_total = run_mpi(circuit, args.nqubits, args.num_slices,
|
||||
total_repeats=args.total_repeats,
|
||||
load_path=args.load_path, save_path=args.save_path)
|
||||
except Exception as e:
|
||||
if rank == 0:
|
||||
print(f"[FAILED] {e}")
|
||||
raise
|
||||
|
||||
if rank == 0 and sv is not None:
|
||||
print(f"\n[quimb TN MPI] time={t_total:.4f}s shape={sv.shape}")
|
||||
np.save(f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy", sv)
|
||||
|
||||
if not args.no_compare:
|
||||
from qibotn.bak.benchmark_tn import run_qibojit
|
||||
import gc
|
||||
np.random.seed(42)
|
||||
circuit_ref = make_circuit(args.circuit, args.nqubits, args.nlayers)
|
||||
sv_ref, t_ref = run_qibojit(circuit_ref)
|
||||
np.save(f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy", sv_ref)
|
||||
print(f"[qibojit] time={t_ref:.4f}s")
|
||||
# free memory before loading via mmap for expval comparison
|
||||
del sv, sv_ref
|
||||
gc.collect()
|
||||
from compare_jit_tn_quimb import check_results
|
||||
ref_path = f"data/sv_qibojit_{args.circuit}{args.nqubits}.npy"
|
||||
tn_path = f"data/sv_tn_{args.circuit}{args.nqubits}_mpi.npy"
|
||||
check_results(ref_path, tn_path, args.nqubits)
|
||||
if t_total > 0:
|
||||
print(f"Speedup : {t_ref/t_total:.2f}x")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,25 +0,0 @@
|
||||
"""Check contraction tree statistics."""
|
||||
import pickle, sys
|
||||
|
||||
path = sys.argv[1] if len(sys.argv) > 1 else "data/tree_q25_l10.pkl"
|
||||
with open(path, 'rb') as f:
|
||||
tree = pickle.load(f)
|
||||
|
||||
# Intel 8558P: 96 cores, 2.1GHz, AVX-512 (16 FP64/cycle), FMA x2
|
||||
# complex128 multiply-add = 6 real FLOPs
|
||||
CORES = 96
|
||||
FREQ = 2.1e9
|
||||
AVX512_FP64 = 16
|
||||
TFLOPS = CORES * FREQ * AVX512_FP64 * 2 / 1e12 # ~6.45 TFLOPS real FP64
|
||||
COMPLEX_FLOPS = TFLOPS / 6 # complex128 effective
|
||||
|
||||
flops = tree.total_flops()
|
||||
slices = tree.multiplicity
|
||||
est_seconds = flops * slices / (COMPLEX_FLOPS * 1e12)
|
||||
|
||||
print(f"File: {path}")
|
||||
print(f"Peak memory (GB): {tree.max_size() * 16 / 1e9:.2f}")
|
||||
print(f"Total FLOPs: {flops:.2e} x{slices} slices = {flops*slices:.2e}")
|
||||
print(f"Contraction width: {tree.contraction_width()}")
|
||||
print(f"Multiplicity (slices): {slices}")
|
||||
print(f"Estimated time (96 cores): {est_seconds:.1f}s ({est_seconds/3600:.2f}h)")
|
||||
@@ -1,137 +0,0 @@
|
||||
"""Compare QMatchaTeaBackend with the VidalBackend fast path."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.qmatchatea import QMatchaTeaBackend
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed, kind):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind == "brickwall":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
elif kind == "shifted-cz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(q, q + 1))
|
||||
elif kind == "reversed-cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
return circuit
|
||||
|
||||
|
||||
def build_observable(nqubits, kind):
|
||||
form = 0
|
||||
if kind == "ring-xz":
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
elif kind == "open-zz":
|
||||
for q in range(nqubits - 1):
|
||||
form += Z(q) * Z(q + 1) / (nqubits - 1)
|
||||
elif kind == "mixed":
|
||||
form += 0.25 * X(0) - 0.5 * Z(nqubits - 1)
|
||||
for q in range(0, nqubits - 1, 3):
|
||||
form += 0.125 * Y(q) * Y(q + 1)
|
||||
else:
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def run_backend(backend, circuit, observable):
|
||||
start = time.perf_counter()
|
||||
value = backend.expectation(circuit, observable, preprocess=False, compile_circuit=True)
|
||||
return float(np.real(value)), time.perf_counter() - start
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=34)
|
||||
parser.add_argument("--nlayers", type=int, default=20)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("torch", "numpy"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument(
|
||||
"--circuit-kind",
|
||||
choices=("brickwall", "shifted-cz", "reversed-cnot"),
|
||||
default="brickwall",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--observable-kind",
|
||||
choices=("ring-xz", "open-zz", "mixed"),
|
||||
default="ring-xz",
|
||||
)
|
||||
parser.add_argument("--reference-file")
|
||||
parser.add_argument("--skip-qmatchatea", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed, args.circuit_kind)
|
||||
observable = build_observable(args.nqubits, args.observable_kind)
|
||||
|
||||
exact = None
|
||||
if args.reference_file:
|
||||
with open(args.reference_file, "r", encoding="utf-8") as f:
|
||||
exact = float(json.load(f)["expectation"])
|
||||
|
||||
print(
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} bond={args.bond} "
|
||||
f"circuit={args.circuit_kind} observable={args.observable_kind} "
|
||||
f"tensor_module={args.tensor_module} torch_threads={args.torch_threads}"
|
||||
)
|
||||
if exact is not None:
|
||||
print(f"exact={exact:.16e}")
|
||||
print("backend value abs_error seconds")
|
||||
|
||||
if not args.skip_qmatchatea:
|
||||
qmt = QMatchaTeaBackend()
|
||||
qmt.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
svd_control="E!",
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
track_memory=False,
|
||||
)
|
||||
value, seconds = run_backend(qmt, circuit, observable)
|
||||
error = float("nan") if exact is None else abs(value - exact)
|
||||
print(f"qmatchatea {value:.16e} {error:.6e} {seconds:.3f}")
|
||||
|
||||
vidal = VidalBackend()
|
||||
vidal.configure_tn_simulation(
|
||||
ansatz="MPS",
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=args.tensor_module,
|
||||
compile_circuit=True,
|
||||
fallback=True,
|
||||
)
|
||||
value, seconds = run_backend(vidal, circuit, observable)
|
||||
error = float("nan") if exact is None else abs(value - exact)
|
||||
print(f"vidal {value:.16e} {error:.6e} {seconds:.3f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,33 +0,0 @@
|
||||
"""Example custom case for tools/run_tn_custom.py."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.7, 0.7)))
|
||||
return circuit
|
||||
|
||||
|
||||
def build_observable(nqubits, seed):
|
||||
return {
|
||||
"terms": [
|
||||
{
|
||||
"coefficient": 1.0 / max(1, nqubits - 1),
|
||||
"operators": [("Z", site), ("Z", site + 1)],
|
||||
}
|
||||
for site in range(nqubits - 1)
|
||||
]
|
||||
}
|
||||
@@ -1,208 +0,0 @@
|
||||
"""Inspect cotengra contraction trees for dominant torch matmul shapes."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import importlib
|
||||
import math
|
||||
import pickle
|
||||
from collections import Counter, defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def _prod(values):
|
||||
out = 1
|
||||
for value in values:
|
||||
out *= int(value)
|
||||
return out
|
||||
|
||||
|
||||
def _broadcast_batch(a_batch, b_batch):
|
||||
if a_batch == b_batch:
|
||||
return _prod(a_batch)
|
||||
if not a_batch:
|
||||
return _prod(b_batch)
|
||||
if not b_batch:
|
||||
return _prod(a_batch)
|
||||
|
||||
ndim = max(len(a_batch), len(b_batch))
|
||||
a_batch = (1,) * (ndim - len(a_batch)) + tuple(a_batch)
|
||||
b_batch = (1,) * (ndim - len(b_batch)) + tuple(b_batch)
|
||||
return _prod(max(a, b) for a, b in zip(a_batch, b_batch))
|
||||
|
||||
|
||||
def _load_tree(path, index):
|
||||
with Path(path).open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
trees = payload["trees"] if isinstance(payload, dict) else payload
|
||||
if not isinstance(trees, (list, tuple)):
|
||||
trees = [trees]
|
||||
return trees[index]
|
||||
|
||||
|
||||
def _analyze_tree(tree):
|
||||
contract_mod = importlib.import_module("cotengra.contract")
|
||||
contractions = contract_mod.extract_contractions(tree)
|
||||
size_dict = tree.size_dict
|
||||
ops = []
|
||||
counts = Counter()
|
||||
|
||||
for op_index, (parent, left, right, tdot, arg, perm) in enumerate(contractions):
|
||||
if left is None and right is None:
|
||||
counts["preprocess"] += 1
|
||||
continue
|
||||
|
||||
left_inds = tree.get_inds(left)
|
||||
right_inds = tree.get_inds(right)
|
||||
parent_inds = tree.get_inds(parent)
|
||||
left_shape = tuple(size_dict[ix] for ix in left_inds)
|
||||
right_shape = tuple(size_dict[ix] for ix in right_inds)
|
||||
|
||||
if tdot:
|
||||
parsed = contract_mod._parse_tensordot_axes_to_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
else:
|
||||
parsed = contract_mod._parse_eq_to_batch_matmul(
|
||||
arg,
|
||||
left_shape,
|
||||
right_shape,
|
||||
)
|
||||
|
||||
(
|
||||
_eq_a,
|
||||
_eq_b,
|
||||
new_shape_a,
|
||||
new_shape_b,
|
||||
_new_shape_ab,
|
||||
_perm_ab,
|
||||
pure_multiplication,
|
||||
) = parsed
|
||||
|
||||
matmul_shape = None
|
||||
matmul_flops = 0
|
||||
if pure_multiplication:
|
||||
kind = "mul"
|
||||
else:
|
||||
a_shape = tuple(new_shape_a or left_shape)
|
||||
b_shape = tuple(new_shape_b or right_shape)
|
||||
batch = _broadcast_batch(a_shape[:-2], b_shape[:-2])
|
||||
m, k, n = int(a_shape[-2]), int(a_shape[-1]), int(b_shape[-1])
|
||||
kind = "mm" if batch == 1 else "bmm"
|
||||
matmul_shape = (batch, m, k, n)
|
||||
matmul_flops = batch * m * k * n
|
||||
|
||||
tree_flops = int(tree.get_flops(parent))
|
||||
out_size = int(tree.get_size(parent))
|
||||
ops.append(
|
||||
{
|
||||
"index": op_index,
|
||||
"kind": kind,
|
||||
"matmul_shape": matmul_shape,
|
||||
"matmul_flops": matmul_flops,
|
||||
"tree_flops": tree_flops,
|
||||
"out_size": out_size,
|
||||
"left_shape": left_shape,
|
||||
"right_shape": right_shape,
|
||||
"left_rank": len(left_inds),
|
||||
"right_rank": len(right_inds),
|
||||
"out_rank": len(parent_inds),
|
||||
"perm": perm,
|
||||
}
|
||||
)
|
||||
counts[kind] += 1
|
||||
|
||||
return contractions, ops, counts
|
||||
|
||||
|
||||
def _format_log(value, base):
|
||||
return "-inf" if value <= 0 else f"{math.log(value, base):.3f}"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("tree", help="Pickle file containing one tree or {'trees': [...]}.")
|
||||
parser.add_argument("--index", type=int, default=0, help="Tree index in the file.")
|
||||
parser.add_argument("--top", type=int, default=20, help="Number of top ops to print.")
|
||||
parser.add_argument(
|
||||
"--dtype-bytes",
|
||||
type=int,
|
||||
default=8,
|
||||
help="Bytes per element for memory estimates, for example 8 for complex64.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
tree = _load_tree(args.tree, args.index)
|
||||
contractions, ops, counts = _analyze_tree(tree)
|
||||
nslices = int(getattr(tree, "multiplicity", 1))
|
||||
per_slice_flops = sum(op["tree_flops"] for op in ops)
|
||||
per_slice_write = sum(op["out_size"] for op in ops)
|
||||
max_out = max((op["out_size"] for op in ops), default=0)
|
||||
all_flops = per_slice_flops * nslices
|
||||
all_write = per_slice_write * nslices
|
||||
|
||||
print(f"tree={args.tree} index={args.index}")
|
||||
print(
|
||||
"summary "
|
||||
f"slices={nslices} contractions={len(contractions)} "
|
||||
f"counts={dict(counts)}"
|
||||
)
|
||||
print(
|
||||
"per_slice "
|
||||
f"log10_flops={_format_log(per_slice_flops, 10)} "
|
||||
f"log10_write={_format_log(per_slice_write, 10)} "
|
||||
f"log2_max_output={_format_log(max_out, 2)} "
|
||||
f"max_output_gib={max_out * args.dtype_bytes / 1024**3:.6g}"
|
||||
)
|
||||
print(
|
||||
"all_slices "
|
||||
f"log10_flops={_format_log(all_flops, 10)} "
|
||||
f"log10_write={_format_log(all_write, 10)}"
|
||||
)
|
||||
|
||||
print(f"\ntop_{args.top}_ops_by_flops")
|
||||
for op in sorted(ops, key=lambda item: item["tree_flops"], reverse=True)[: args.top]:
|
||||
print(
|
||||
f"op={op['index']} kind={op['kind']} "
|
||||
f"flops={op['tree_flops']:.6e} out={op['out_size']:.6e} "
|
||||
f"matmul={op['matmul_shape']} "
|
||||
f"ranks=({op['left_rank']},{op['right_rank']}->{op['out_rank']}) "
|
||||
f"lhs={op['left_shape']} rhs={op['right_shape']}"
|
||||
)
|
||||
|
||||
by_shape = defaultdict(lambda: [0, 0, 0])
|
||||
for op in ops:
|
||||
shape = op["matmul_shape"]
|
||||
if shape is None:
|
||||
continue
|
||||
by_shape[shape][0] += 1
|
||||
by_shape[shape][1] += op["tree_flops"]
|
||||
by_shape[shape][2] += op["out_size"]
|
||||
|
||||
print(f"\ntop_{args.top}_matmul_shapes_by_flops")
|
||||
for shape, (count, flops, out_size) in sorted(
|
||||
by_shape.items(),
|
||||
key=lambda item: item[1][1],
|
||||
reverse=True,
|
||||
)[: args.top]:
|
||||
print(
|
||||
f"shape={shape} count={count} "
|
||||
f"flops={flops:.6e} output={out_size:.6e}"
|
||||
)
|
||||
|
||||
print(f"\ntop_{args.top}_matmul_shapes_by_count")
|
||||
for shape, (count, flops, out_size) in sorted(
|
||||
by_shape.items(),
|
||||
key=lambda item: item[1][0],
|
||||
reverse=True,
|
||||
)[: args.top]:
|
||||
print(
|
||||
f"shape={shape} count={count} "
|
||||
f"flops={flops:.6e} output={out_size:.6e}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,223 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Manage the dask cluster used by TN path search.
|
||||
#
|
||||
# Defaults target two servers:
|
||||
# scheduler: 10.20.1.103:8786
|
||||
# workers: 10.20.1.103, 10.20.6.101
|
||||
#
|
||||
# Usage:
|
||||
# tools/manage_tn_dask_cluster.sh start
|
||||
# tools/manage_tn_dask_cluster.sh status
|
||||
# tools/manage_tn_dask_cluster.sh stop
|
||||
#
|
||||
# Common overrides:
|
||||
# SCHEDULER_HOST=10.20.1.103
|
||||
# WORKER_HOSTS="10.20.1.103 10.20.6.101"
|
||||
# NWORKERS=48
|
||||
# NTHREADS=1
|
||||
# ROOT_DIR=/home/yx/qibotn
|
||||
# PYTHON_BIN=.venv/bin/python
|
||||
|
||||
ROOT_DIR="${ROOT_DIR:-/home/yx/qibotn}"
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
SCHEDULER_HOST="${SCHEDULER_HOST:-10.20.1.103}"
|
||||
SCHEDULER_PORT="${SCHEDULER_PORT:-8786}"
|
||||
DASHBOARD_ADDRESS="${DASHBOARD_ADDRESS:-:8787}"
|
||||
WORKER_HOSTS="${WORKER_HOSTS:-10.20.1.103 10.20.6.101}"
|
||||
NWORKERS="${NWORKERS:-84}"
|
||||
NTHREADS="${NTHREADS:-1}"
|
||||
MEMORY_LIMIT="${MEMORY_LIMIT:-0}"
|
||||
LOCAL_DIRECTORY="${LOCAL_DIRECTORY:-/tmp/qibotn-dask}"
|
||||
LOG_DIR="${LOG_DIR:-$ROOT_DIR/logs/dask}"
|
||||
SSH_BIN="${SSH_BIN:-ssh}"
|
||||
DASK_WORKER_TTL="${DASK_WORKER_TTL:-24 hours}"
|
||||
DASK_TICK_LIMIT="${DASK_TICK_LIMIT:-30 minutes}"
|
||||
DASK_LOST_WORKER_TIMEOUT="${DASK_LOST_WORKER_TIMEOUT:-30 minutes}"
|
||||
|
||||
SCHEDULER_ADDR="tcp://${SCHEDULER_HOST}:${SCHEDULER_PORT}"
|
||||
|
||||
is_local_host() {
|
||||
local host="$1"
|
||||
[[ "$host" == "localhost" || "$host" == "127.0.0.1" ]] && return 0
|
||||
[[ "$host" == "$(hostname)" ]] && return 0
|
||||
[[ "$host" == "$(hostname -f 2>/dev/null || true)" ]] && return 0
|
||||
hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx "$host"
|
||||
}
|
||||
|
||||
run_on_host() {
|
||||
local host="$1"
|
||||
shift
|
||||
local cmd="$*"
|
||||
if is_local_host "$host"; then
|
||||
bash -lc "$cmd"
|
||||
else
|
||||
"$SSH_BIN" "$host" "bash -lc $(printf '%q' "$cmd")"
|
||||
fi
|
||||
}
|
||||
|
||||
start_scheduler() {
|
||||
local host="$SCHEDULER_HOST"
|
||||
local log="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.log"
|
||||
local pid_file="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.pid"
|
||||
run_on_host "$host" "
|
||||
set -euo pipefail
|
||||
cd '$ROOT_DIR'
|
||||
mkdir -p '$LOG_DIR'
|
||||
if [[ -s '$pid_file' ]]; then
|
||||
pid=\$(cat '$pid_file')
|
||||
if kill -0 \"\$pid\" 2>/dev/null; then
|
||||
echo \"scheduler already running on $host pid=\$pid\"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL='$DASK_WORKER_TTL' \
|
||||
DASK_DISTRIBUTED__ADMIN__TICK__LIMIT='$DASK_TICK_LIMIT' \
|
||||
DASK_DISTRIBUTED__DEPLOY__LOST_WORKER_TIMEOUT='$DASK_LOST_WORKER_TIMEOUT' \
|
||||
setsid '$PYTHON_BIN' -m distributed.cli.dask_scheduler \
|
||||
--host '$SCHEDULER_HOST' \
|
||||
--port '$SCHEDULER_PORT' \
|
||||
--dashboard-address '$DASHBOARD_ADDRESS' \
|
||||
> '$log' 2>&1 < /dev/null &
|
||||
pid=\$!
|
||||
echo \"\$pid\" > '$pid_file'
|
||||
echo \"scheduler host=$host pid=\$pid addr=$SCHEDULER_ADDR log=$log\"
|
||||
"
|
||||
}
|
||||
|
||||
start_worker() {
|
||||
local host="$1"
|
||||
local log="$LOG_DIR/worker_${host}.log"
|
||||
local pid_file="$LOG_DIR/worker_${host}.pid"
|
||||
run_on_host "$host" "
|
||||
set -euo pipefail
|
||||
cd '$ROOT_DIR'
|
||||
mkdir -p '$LOG_DIR' '$LOCAL_DIRECTORY'
|
||||
if [[ -s '$pid_file' ]]; then
|
||||
pid=\$(cat '$pid_file')
|
||||
if kill -0 \"\$pid\" 2>/dev/null; then
|
||||
echo \"worker already running on $host pid=\$pid\"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
TCM_ENABLE=1 \
|
||||
DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL='$DASK_WORKER_TTL' \
|
||||
DASK_DISTRIBUTED__ADMIN__TICK__LIMIT='$DASK_TICK_LIMIT' \
|
||||
DASK_DISTRIBUTED__DEPLOY__LOST_WORKER_TIMEOUT='$DASK_LOST_WORKER_TIMEOUT' \
|
||||
setsid '$PYTHON_BIN' -m distributed.cli.dask_worker \
|
||||
'$SCHEDULER_ADDR' \
|
||||
--host '$host' \
|
||||
--nworkers '$NWORKERS' \
|
||||
--nthreads '$NTHREADS' \
|
||||
--memory-limit '$MEMORY_LIMIT' \
|
||||
--local-directory '$LOCAL_DIRECTORY' \
|
||||
> '$log' 2>&1 < /dev/null &
|
||||
pid=\$!
|
||||
echo \"\$pid\" > '$pid_file'
|
||||
echo \"worker host=$host pid=\$pid scheduler=$SCHEDULER_ADDR log=$log\"
|
||||
"
|
||||
}
|
||||
|
||||
stop_host() {
|
||||
local host="$1"
|
||||
local scheduler_pid_file="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.pid"
|
||||
local worker_pid_file="$LOG_DIR/worker_${host}.pid"
|
||||
run_on_host "$host" "
|
||||
set +e
|
||||
for pid_file in '$worker_pid_file' '$scheduler_pid_file'; do
|
||||
[[ -f \"\$pid_file\" ]] || continue
|
||||
if [[ \"\$pid_file\" == '$scheduler_pid_file' && '$host' != '$SCHEDULER_HOST' ]]; then
|
||||
continue
|
||||
fi
|
||||
pid=\$(cat \"\$pid_file\")
|
||||
kill \"\$pid\" 2>/dev/null || true
|
||||
rm -f \"\$pid_file\"
|
||||
done
|
||||
pkill -f '[d]istributed.cli.dask_worker.*$SCHEDULER_ADDR'
|
||||
pkill -f '[d]istributed.cli.dask_scheduler.*--port $SCHEDULER_PORT'
|
||||
true
|
||||
"
|
||||
}
|
||||
|
||||
status_host() {
|
||||
local host="$1"
|
||||
local scheduler_pid_file="$LOG_DIR/scheduler_${SCHEDULER_HOST}_${SCHEDULER_PORT}.pid"
|
||||
local worker_pid_file="$LOG_DIR/worker_${host}.pid"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "host=$host"
|
||||
run_on_host "$host" "
|
||||
set +e
|
||||
for pid_file in '$worker_pid_file' '$scheduler_pid_file'; do
|
||||
[[ -f \"\$pid_file\" ]] || continue
|
||||
if [[ \"\$pid_file\" == '$scheduler_pid_file' && '$host' != '$SCHEDULER_HOST' ]]; then
|
||||
continue
|
||||
fi
|
||||
pid=\$(cat \"\$pid_file\")
|
||||
if kill -0 \"\$pid\" 2>/dev/null; then
|
||||
ps -p \"\$pid\" -o pid,ppid,stat,etime,cmd --no-headers
|
||||
else
|
||||
echo \"stale pid_file=\$pid_file pid=\$pid\"
|
||||
fi
|
||||
done
|
||||
pgrep -af '[d]istributed.cli.dask' || true
|
||||
"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
start)
|
||||
start_scheduler
|
||||
sleep 2
|
||||
for host in $WORKER_HOSTS; do
|
||||
start_worker "$host"
|
||||
done
|
||||
echo
|
||||
echo "Dask scheduler: $SCHEDULER_ADDR"
|
||||
echo "Dashboard: http://$SCHEDULER_HOST$DASHBOARD_ADDRESS"
|
||||
;;
|
||||
stop)
|
||||
for host in $WORKER_HOSTS; do
|
||||
stop_host "$host"
|
||||
done
|
||||
stop_host "$SCHEDULER_HOST"
|
||||
;;
|
||||
status)
|
||||
status_host "$SCHEDULER_HOST"
|
||||
for host in $WORKER_HOSTS; do
|
||||
[[ "$host" == "$SCHEDULER_HOST" ]] && continue
|
||||
status_host "$host"
|
||||
done
|
||||
;;
|
||||
restart)
|
||||
"$0" stop
|
||||
sleep 2
|
||||
"$0" start
|
||||
;;
|
||||
help|*)
|
||||
cat <<EOF
|
||||
Usage: tools/manage_tn_dask_cluster.sh [start|stop|restart|status]
|
||||
|
||||
Defaults:
|
||||
SCHEDULER_HOST=$SCHEDULER_HOST
|
||||
SCHEDULER_PORT=$SCHEDULER_PORT
|
||||
WORKER_HOSTS="$WORKER_HOSTS"
|
||||
NWORKERS=$NWORKERS
|
||||
NTHREADS=$NTHREADS
|
||||
ROOT_DIR=$ROOT_DIR
|
||||
PYTHON_BIN=$PYTHON_BIN
|
||||
DASK_WORKER_TTL="$DASK_WORKER_TTL"
|
||||
DASK_TICK_LIMIT=$DASK_TICK_LIMIT
|
||||
DASK_LOST_WORKER_TIMEOUT=$DASK_LOST_WORKER_TIMEOUT
|
||||
|
||||
Search command after start:
|
||||
TCM_ENABLE=1 python -u tools/tn_contest_runner.py search \\
|
||||
--case main1 \\
|
||||
--dask-address $SCHEDULER_ADDR \\
|
||||
--torch-threads 48 \\
|
||||
--dtype complex64 \\
|
||||
--tn-search-repeats 2048 \\
|
||||
--tn-search-time 300
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,313 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""Contest-style multi-node Vidal/MPS expectation runner."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import sys
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend # noqa: E402
|
||||
from qibotn.expectation_runner import exact_for_observable # noqa: E402
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CaseSpec:
|
||||
circuit_kind: str
|
||||
observables: tuple[str, ...]
|
||||
nqubits: int
|
||||
nlayers: int
|
||||
bond: int | None
|
||||
seed: int
|
||||
|
||||
|
||||
CASES = {
|
||||
"main1": CaseSpec(
|
||||
circuit_kind="reversed_cnot",
|
||||
observables=("ring_xz",),
|
||||
nqubits=128,
|
||||
nlayers=24,
|
||||
bond=512,
|
||||
seed=31001,
|
||||
),
|
||||
"main2": CaseSpec(
|
||||
circuit_kind="rxx_rzz",
|
||||
observables=("open_zz", "range2_xx", "mixed_local"),
|
||||
nqubits=128,
|
||||
nlayers=32,
|
||||
bond=1024,
|
||||
seed=31002,
|
||||
),
|
||||
"strong": CaseSpec(
|
||||
circuit_kind="scramble",
|
||||
observables=("ring_xz", "long_z_string", "dense3_spread"),
|
||||
nqubits=256,
|
||||
nlayers=48,
|
||||
bond=2048,
|
||||
seed=41001,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def add_single_qubit_layer(circuit, nqubits, rng, include_rx=False):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if include_rx:
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
if kind == "reversed_cnot":
|
||||
add_single_qubit_layer(circuit, nqubits, rng)
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 == 0 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
|
||||
elif kind == "rxx_rzz":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
|
||||
elif kind == "scramble":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def dense_observable(nqubits, qubits, seed, dim):
|
||||
del nqubits
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def observable(kind, nqubits, seed):
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
if kind == "boundary_ZZ_q1":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))
|
||||
if kind == "boundary_ZZ_q2":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))
|
||||
if kind == "boundary_ZZ_q3":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))
|
||||
if kind == "long_Z_5_sites":
|
||||
return hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last))
|
||||
if kind == "mixed_XZYZX":
|
||||
return hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last))
|
||||
if kind == "ring_xz":
|
||||
form = 0
|
||||
for qubit in range(nqubits):
|
||||
form += 0.5 * X(qubit) * Z((qubit + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "open_zz":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 1):
|
||||
form += (1.0 / max(1, nqubits - 1)) * Z(qubit) * Z(qubit + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "range2_xx":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 2):
|
||||
form += (1.0 / max(1, nqubits - 2)) * X(qubit) * X(qubit + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "mixed_local":
|
||||
form = 0.25 * X(0) - 0.5 * Z(last) + 0.125 * X(q1) * Z(q2) * Y(q3)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
if kind == "complex_iZ0":
|
||||
return hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))
|
||||
if kind == "dense2_mid":
|
||||
return dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)
|
||||
if kind == "dense3_spread":
|
||||
return dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def selected_observables(args, case):
|
||||
if args.observables:
|
||||
return tuple(args.observables)
|
||||
if args.obs_filter:
|
||||
return tuple(x.strip() for x in args.obs_filter.split(",") if x.strip())
|
||||
return case.observables
|
||||
|
||||
|
||||
def apply_case_defaults(args):
|
||||
case = CASES[args.case]
|
||||
if args.nqubits is None:
|
||||
args.nqubits = case.nqubits
|
||||
if args.nlayers is None:
|
||||
args.nlayers = case.nlayers
|
||||
if args.bond == "case-default":
|
||||
args.bond = case.bond
|
||||
if args.seed is None:
|
||||
args.seed = case.seed
|
||||
args.observables = selected_observables(args, case)
|
||||
|
||||
|
||||
def run_case(args):
|
||||
set_torch_threads(args.torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
case = CASES[args.case]
|
||||
circuit = build_circuit(case.circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
"backend=vidal_mps "
|
||||
f"case={args.case} circuit={case.circuit_kind} ranks={size} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} gates={len(circuit.queue)} "
|
||||
f"bond={format_optional(args.bond)} cut_ratio={format_optional(args.cut_ratio)} "
|
||||
f"torch_threads={args.torch_threads} seed={args.seed} "
|
||||
f"observables={','.join(args.observables)}",
|
||||
flush=True,
|
||||
)
|
||||
print("observable exact value abs_error rel_error seconds trunc_sum trunc_max status", flush=True)
|
||||
|
||||
for obs_name in args.observables:
|
||||
obs = observable(obs_name, args.nqubits, args.seed)
|
||||
exact = None
|
||||
if args.exact and rank == 0:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, obs, args.nqubits)
|
||||
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
obs,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
status = "ok"
|
||||
except Exception as exc:
|
||||
value = np.nan
|
||||
status = type(exc).__name__ + ":" + str(exc).split("\n", 1)[0]
|
||||
seconds = time.perf_counter() - start
|
||||
|
||||
if rank == 0:
|
||||
abs_error = float("nan") if exact is None else abs(value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"{obs_name} {exact_text} {value!r} "
|
||||
f"{abs_error:.6e} {rel_error:.6e} {seconds:.3f} "
|
||||
f"{backend.last_truncation_error:.6e} "
|
||||
f"{backend.last_max_truncation_error:.6e} {status}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("mode", choices=("run", "validate", "list"))
|
||||
parser.add_argument("--case", choices=sorted(CASES), default="main1")
|
||||
parser.add_argument("--observables", nargs="+")
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
parser.add_argument("--nqubits", type=int)
|
||||
parser.add_argument("--nlayers", type=int)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", default="case-default")
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--seed", type=int)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mode == "list":
|
||||
for name, case in CASES.items():
|
||||
print(
|
||||
f"{name}: circuit={case.circuit_kind} "
|
||||
f"observables={','.join(case.observables)} "
|
||||
f"nqubits={case.nqubits} nlayers={case.nlayers} "
|
||||
f"bond={case.bond} seed={case.seed}"
|
||||
)
|
||||
return
|
||||
|
||||
apply_case_defaults(args)
|
||||
if isinstance(args.bond, str):
|
||||
args.bond = optional_int(args.bond)
|
||||
|
||||
if args.mode == "validate":
|
||||
args.exact = True
|
||||
args.nqubits = min(args.nqubits, args.exact_max_qubits)
|
||||
|
||||
run_case(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,72 +0,0 @@
|
||||
"""Chrome trace profiler for the VidalBackend fast path."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
import torch
|
||||
from torch.profiler import ProfilerActivity, profile
|
||||
|
||||
from qibotn.benchmark_cases import build_circuit, terms_to_dict, observable_terms
|
||||
from qibotn.expectation_runner import ExpectationConfig, run_cpu_expectation
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=34)
|
||||
parser.add_argument("--nlayers", type=int, default=20)
|
||||
parser.add_argument("--bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument("--cut-ratio", type=float, default=1e-12)
|
||||
parser.add_argument("--profile-memory", action="store_true")
|
||||
parser.add_argument("--rows", type=int, default=60)
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
|
||||
prefix = f"profiles/vidal_n{args.nqubits}_l{args.nlayers}_b{args.bond}_t{args.torch_threads}"
|
||||
trace_path = Path(f"{prefix}.json")
|
||||
table_path = Path(f"{prefix}.txt")
|
||||
trace_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
circuit = build_circuit("brickwall_cnot", args.nqubits, args.nlayers, args.seed)
|
||||
observable = terms_to_dict(observable_terms("ring_xz", args.nqubits))
|
||||
config = ExpectationConfig(
|
||||
ansatz="mps",
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
torch_threads=args.torch_threads,
|
||||
)
|
||||
|
||||
print(
|
||||
f"profile vidal nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={args.bond} threads={args.torch_threads}"
|
||||
)
|
||||
|
||||
with profile(
|
||||
activities=[ProfilerActivity.CPU],
|
||||
record_shapes=args.profile_memory,
|
||||
profile_memory=args.profile_memory,
|
||||
with_stack=args.profile_memory,
|
||||
) as prof:
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
|
||||
table = (
|
||||
f"expval={result.value:.16e}\n\n"
|
||||
f"# sorted by self_cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='self_cpu_time_total', row_limit=args.rows)}\n\n"
|
||||
f"# sorted by cpu_time_total\n"
|
||||
f"{prof.key_averages().table(sort_by='cpu_time_total', row_limit=args.rows)}\n"
|
||||
)
|
||||
|
||||
print(table, end="")
|
||||
table_path.write_text(table, encoding="utf-8")
|
||||
prof.export_chrome_trace(str(trace_path))
|
||||
print(f"trace={trace_path}\ntable={table_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,109 +0,0 @@
|
||||
"""Compute and cache a qibojit state-vector reference for the ring-XZ observable."""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import math
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
import qibo
|
||||
from qibo import Circuit, gates
|
||||
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for _ in range(nlayers):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(qubit, qubit + 1))
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz_expectation(state, nqubits, chunk_size):
|
||||
value = 0.0
|
||||
for qubit in range(nqubits):
|
||||
next_qubit = (qubit + 1) % nqubits
|
||||
x_flip = 1 << (nqubits - 1 - qubit)
|
||||
z_shift = nqubits - 1 - next_qubit
|
||||
term = 0.0
|
||||
for start in range(0, state.size, chunk_size):
|
||||
stop = min(start + chunk_size, state.size)
|
||||
indices = np.arange(start, stop, dtype=np.int64)
|
||||
z_bit = (indices >> z_shift) & 1
|
||||
z_phase = 1 - 2 * z_bit
|
||||
term += np.vdot(state[indices ^ x_flip], z_phase * state[start:stop]).real
|
||||
value += 0.5 * term
|
||||
return float(value)
|
||||
|
||||
|
||||
def default_output_path(nqubits, nlayers, seed):
|
||||
return Path("references") / (
|
||||
f"qibojit_ring_xz_n{nqubits}_l{nlayers}_seed{seed}.json"
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=32)
|
||||
parser.add_argument("--nlayers", type=int, default=3)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--output")
|
||||
parser.add_argument("--force", action="store_true")
|
||||
parser.add_argument("--allow-large", action="store_true")
|
||||
parser.add_argument("--max-state-gb", type=float, default=32.0)
|
||||
parser.add_argument("--chunk-size", type=int, default=1 << 20)
|
||||
args = parser.parse_args()
|
||||
|
||||
output = Path(args.output) if args.output else default_output_path(
|
||||
args.nqubits, args.nlayers, args.seed
|
||||
)
|
||||
if output.exists() and not args.force:
|
||||
with open(output, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
print(f"loaded {output}")
|
||||
print(f"expectation={float(data['expectation']):.16e}")
|
||||
return
|
||||
|
||||
state_gb = (2**args.nqubits) * np.dtype(np.complex128).itemsize / (1024**3)
|
||||
if state_gb > args.max_state_gb and not args.allow_large:
|
||||
raise MemoryError(
|
||||
f"Estimated state vector alone is {state_gb:.1f} GiB. "
|
||||
"Pass --allow-large after confirming the node has enough memory."
|
||||
)
|
||||
|
||||
qibo.set_backend("qibojit")
|
||||
circuit = build_circuit(args.nqubits, args.nlayers, args.seed)
|
||||
|
||||
start = time.perf_counter()
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
expectation = ring_xz_expectation(state, args.nqubits, args.chunk_size)
|
||||
elapsed = time.perf_counter() - start
|
||||
|
||||
data = {
|
||||
"backend": "qibojit",
|
||||
"observable": "0.5 * sum_i X_i Z_((i+1) mod n)",
|
||||
"nqubits": args.nqubits,
|
||||
"nlayers": args.nlayers,
|
||||
"seed": args.seed,
|
||||
"expectation": expectation,
|
||||
"seconds": elapsed,
|
||||
"state_vector_gib_estimate": state_gb,
|
||||
}
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2, sort_keys=True)
|
||||
f.write("\n")
|
||||
|
||||
print(f"saved {output}")
|
||||
print(f"expectation={expectation:.16e}")
|
||||
print(f"seconds={elapsed:.3f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,127 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Large CPU expectation benchmarks for two-server runs.
|
||||
#
|
||||
# Defaults assume two Intel Xeon Platinum 8558P servers with about 500 GiB RAM
|
||||
# each. Override HOSTFILE, PYTHON_BIN, MPIEXEC, or the per-case knobs below as
|
||||
# needed.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-hostfile}"
|
||||
|
||||
MPS_RANKS="${MPS_RANKS:-8}"
|
||||
MPS_THREADS="${MPS_THREADS:-12}"
|
||||
TN_RANKS="${TN_RANKS:-12}"
|
||||
TN_THREADS="${TN_THREADS:-8}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
run_mpi() {
|
||||
local ranks="$1"
|
||||
shift
|
||||
"$MPIEXEC" -hostfile "$HOSTFILE" -n "$ranks" "$PYTHON_BIN" "$@"
|
||||
}
|
||||
|
||||
run_case() {
|
||||
local title="$1"
|
||||
shift
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$title"
|
||||
echo "================================================================================"
|
||||
echo "HOSTFILE=$HOSTFILE PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
echo "$*"
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
smoke)
|
||||
run_case "MPS MPI smoke: n=40 layers=30 bond=2048" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_SMOKE_NQ:-40}" \
|
||||
--nlayers "${MPS_SMOKE_LAYERS:-30}" \
|
||||
--bond "${MPS_SMOKE_BOND:-2048}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz \
|
||||
--observables ring_xz open_zz range2_xx
|
||||
|
||||
run_case "TN MPI smoke: n=32 layers=16 target_slices=12" \
|
||||
run_mpi "$TN_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "${TN_SMOKE_NQ:-32}" \
|
||||
--nlayers "${TN_SMOKE_LAYERS:-16}" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices "${TN_SMOKE_SLICES:-12}"
|
||||
;;
|
||||
|
||||
mps-long)
|
||||
run_case "MPS MPI long: n=64 layers=48 bond=4096" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_LONG_NQ:-64}" \
|
||||
--nlayers "${MPS_LONG_LAYERS:-48}" \
|
||||
--bond "${MPS_LONG_BOND:-4096}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
mps-pressure)
|
||||
run_case "MPS MPI pressure: n=80 layers=64 bond=4096" \
|
||||
run_mpi "$MPS_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "${MPS_PRESSURE_NQ:-80}" \
|
||||
--nlayers "${MPS_PRESSURE_LAYERS:-64}" \
|
||||
--bond "${MPS_PRESSURE_BOND:-4096}" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz swap_scramble \
|
||||
--observables ring_xz open_zz mixed_local range2_xx long_z_string
|
||||
;;
|
||||
|
||||
tn-long)
|
||||
run_case "TN MPI long: n=36 layers=20 target_slices=24" \
|
||||
run_mpi "$TN_RANKS" benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "${TN_LONG_NQ:-36}" \
|
||||
--nlayers "${TN_LONG_LAYERS:-20}" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices "${TN_LONG_SLICES:-24}"
|
||||
;;
|
||||
|
||||
all)
|
||||
"$0" smoke
|
||||
"$0" mps-long
|
||||
"$0" tn-long
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_cpu_large_cases.sh [smoke|mps-long|mps-pressure|tn-long|all]
|
||||
|
||||
Common overrides:
|
||||
HOSTFILE=hostfile
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPS_RANKS=8 MPS_THREADS=12
|
||||
TN_RANKS=12 TN_THREADS=8
|
||||
|
||||
Scale overrides:
|
||||
MPS_LONG_NQ=64 MPS_LONG_LAYERS=48 MPS_LONG_BOND=4096
|
||||
MPS_PRESSURE_NQ=80 MPS_PRESSURE_LAYERS=64 MPS_PRESSURE_BOND=4096
|
||||
TN_LONG_NQ=36 TN_LONG_LAYERS=20 TN_LONG_SLICES=24
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,148 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Single-node CPU scale probes for expectation benchmarks.
|
||||
#
|
||||
# Intended for one 96-core / ~500 GiB RAM node. The default "probe" mode runs
|
||||
# moderate MPS and TN cases first. Larger modes are available after checking
|
||||
# runtime and memory from the probe output.
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
PYTHON_FLAGS="${PYTHON_FLAGS:--u}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
TIME_BIN="${TIME_BIN:-/usr/bin/time}"
|
||||
|
||||
MPS_RANKS="${MPS_RANKS:-8}"
|
||||
MPS_THREADS="${MPS_THREADS:-12}"
|
||||
TN_RANKS="${TN_RANKS:-8}"
|
||||
TN_THREADS="${TN_THREADS:-12}"
|
||||
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-1}"
|
||||
|
||||
estimate_mps_memory() {
|
||||
local nqubits="$1"
|
||||
local bond="$2"
|
||||
"$PYTHON_BIN" - "$nqubits" "$bond" "$MPS_RANKS" <<'PY'
|
||||
import sys
|
||||
n = int(sys.argv[1])
|
||||
chi = int(sys.argv[2])
|
||||
ranks = int(sys.argv[3])
|
||||
resident = n * 2 * chi * chi * 16
|
||||
per_rank = resident / ranks
|
||||
print(
|
||||
"MPS rough resident memory: "
|
||||
f"total={resident / 1024**3:.1f} GiB "
|
||||
f"per_rank={per_rank / 1024**3:.1f} GiB "
|
||||
"(temporary eig/SVD workspaces are additional)"
|
||||
)
|
||||
PY
|
||||
}
|
||||
|
||||
run_timed() {
|
||||
echo
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
echo "$*"
|
||||
echo "--------------------------------------------------------------------------------"
|
||||
"$TIME_BIN" -v "$@"
|
||||
}
|
||||
|
||||
run_mps_case() {
|
||||
local label="$1"
|
||||
local nqubits="$2"
|
||||
local nlayers="$3"
|
||||
local bond="$4"
|
||||
shift 4
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$label"
|
||||
echo "================================================================================"
|
||||
echo "PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "MPS_RANKS=$MPS_RANKS MPS_THREADS=$MPS_THREADS"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
estimate_mps_memory "$nqubits" "$bond"
|
||||
run_timed "$MPIEXEC" -n "$MPS_RANKS" "$PYTHON_BIN" $PYTHON_FLAGS benchmark_cpu_expectation.py \
|
||||
--mpi --mps \
|
||||
--nqubits "$nqubits" \
|
||||
--nlayers "$nlayers" \
|
||||
--bond "$bond" \
|
||||
--torch-threads "$MPS_THREADS" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
run_tn_case() {
|
||||
local label="$1"
|
||||
local nqubits="$2"
|
||||
local nlayers="$3"
|
||||
shift 3
|
||||
echo
|
||||
echo "================================================================================"
|
||||
echo "$label"
|
||||
echo "================================================================================"
|
||||
echo "PYTHON_BIN=$PYTHON_BIN MPIEXEC=$MPIEXEC"
|
||||
echo "TN_RANKS=$TN_RANKS TN_THREADS=$TN_THREADS"
|
||||
echo "OMP_NUM_THREADS=$OMP_NUM_THREADS MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
echo "TN memory is contraction-tree dependent; increase --tn-target-slices if RSS is high."
|
||||
run_timed "$MPIEXEC" -n "$TN_RANKS" "$PYTHON_BIN" $PYTHON_FLAGS benchmark_cpu_expectation.py \
|
||||
--mpi \
|
||||
--nqubits "$nqubits" \
|
||||
--nlayers "$nlayers" \
|
||||
--torch-threads "$TN_THREADS" \
|
||||
"$@"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
probe)
|
||||
run_mps_case "MPS probe: n=40 layers=30 bond=2048" 40 30 2048 \
|
||||
--circuits brickwall_cnot \
|
||||
--observables ring_xz
|
||||
|
||||
run_tn_case "TN probe: n=28 layers=12 target_slices=8" 28 12 \
|
||||
--circuits brickwall_cnot \
|
||||
--observables ring_xz \
|
||||
--tn-target-slices 8
|
||||
;;
|
||||
|
||||
mps-medium)
|
||||
run_mps_case "MPS medium: n=56 layers=40 bond=3072" 56 40 3072 \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
mps-long)
|
||||
run_mps_case "MPS long: n=64 layers=48 bond=4096" 64 48 4096 \
|
||||
--circuits brickwall_cnot reversed_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz mixed_local range2_xx
|
||||
;;
|
||||
|
||||
tn-medium)
|
||||
run_tn_case "TN medium: n=32 layers=16 target_slices=16" 32 16 \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices 16
|
||||
;;
|
||||
|
||||
tn-long)
|
||||
run_tn_case "TN long: n=36 layers=20 target_slices=32" 36 20 \
|
||||
--circuits brickwall_cnot shifted_cz rxx_rzz \
|
||||
--observables ring_xz open_zz range2_xx \
|
||||
--tn-target-slices 32
|
||||
;;
|
||||
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_cpu_single_cases.sh [probe|mps-medium|mps-long|tn-medium|tn-long]
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPS_RANKS=8 MPS_THREADS=12
|
||||
TN_RANKS=8 TN_THREADS=12
|
||||
OMP_NUM_THREADS=1 MKL_NUM_THREADS=1
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,243 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""Run TN expectation for a user-provided circuit and observable.
|
||||
|
||||
The case module should define:
|
||||
|
||||
def build_circuit(nqubits, nlayers, seed): ...
|
||||
def build_observable(nqubits, seed): ...
|
||||
|
||||
``build_observable`` may return a Qibo SymbolicHamiltonian/form or the qibotn
|
||||
dict form:
|
||||
|
||||
{"terms": [
|
||||
{"coefficient": 1.0, "operators": [("X", 0), ("Z", 1)]},
|
||||
]}
|
||||
|
||||
For a single repeated Pauli string, pass ``--pauli-pattern`` instead of
|
||||
defining ``build_observable``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import importlib.util
|
||||
import inspect
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from qibotn.expectation_runner import ( # noqa: E402
|
||||
ExpectationConfig,
|
||||
exact_for_observable,
|
||||
run_cpu_expectation,
|
||||
)
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def load_module(path):
|
||||
path = Path(path).resolve()
|
||||
spec = importlib.util.spec_from_file_location(path.stem, path)
|
||||
if spec is None or spec.loader is None:
|
||||
raise RuntimeError(f"Cannot import case module from {path}.")
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def call_builder(fn, **kwargs):
|
||||
sig = inspect.signature(fn)
|
||||
if any(p.kind == p.VAR_KEYWORD for p in sig.parameters.values()):
|
||||
return fn(**kwargs)
|
||||
accepted = {
|
||||
name: value
|
||||
for name, value in kwargs.items()
|
||||
if name in sig.parameters
|
||||
}
|
||||
return fn(**accepted)
|
||||
|
||||
|
||||
def load_observable(args, module):
|
||||
if args.pauli_pattern:
|
||||
return {"pauli_string_pattern": args.pauli_pattern}
|
||||
if args.observable_json:
|
||||
with Path(args.observable_json).open() as f:
|
||||
return json.load(f)
|
||||
if hasattr(module, "build_observable"):
|
||||
return call_builder(
|
||||
module.build_observable,
|
||||
nqubits=args.nqubits,
|
||||
nlayers=args.nlayers,
|
||||
seed=args.seed,
|
||||
)
|
||||
if hasattr(module, "OBSERVABLE"):
|
||||
return module.OBSERVABLE
|
||||
raise ValueError(
|
||||
"No observable supplied. Define build_observable/OBSERVABLE in the case "
|
||||
"module, or pass --pauli-pattern / --observable-json."
|
||||
)
|
||||
|
||||
|
||||
def build_parallel_opts(args):
|
||||
slicing_opts = {}
|
||||
if args.tn_target_slices is not None:
|
||||
slicing_opts["target_slices"] = args.tn_target_slices
|
||||
if args.tn_target_size is not None:
|
||||
slicing_opts["target_size"] = args.tn_target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": args.tn_search_workers or args.torch_threads,
|
||||
"max_repeats": args.tn_search_repeats,
|
||||
"max_time": args.tn_search_time,
|
||||
"print_stats": not args.no_tn_stats,
|
||||
}
|
||||
if args.tn_search_backend is not None:
|
||||
opts["search_backend"] = args.tn_search_backend
|
||||
if args.dask_address is not None:
|
||||
opts["dask_address"] = args.dask_address
|
||||
if args.dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
if args.tn_save_tree is not None:
|
||||
opts["save_tree_path"] = args.tn_save_tree
|
||||
if args.tn_load_tree is not None:
|
||||
opts["load_tree_path"] = args.tn_load_tree
|
||||
if args.tn_search_only:
|
||||
opts["search_only"] = True
|
||||
return opts
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Run CPU TN expectation for a custom qibo circuit module."
|
||||
)
|
||||
parser.add_argument("case_module", help="Python file defining build_circuit.")
|
||||
parser.add_argument("--nqubits", type=int, required=True)
|
||||
parser.add_argument("--nlayers", type=int, default=0)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=1024)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--quimb-backend", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--dtype", choices=("complex128", "complex64"), default="complex128")
|
||||
parser.add_argument("--pauli-pattern")
|
||||
parser.add_argument("--observable-json")
|
||||
parser.add_argument("--tn-target-slices", type=int)
|
||||
parser.add_argument("--tn-target-size", type=int, default=2**32)
|
||||
parser.add_argument("--tn-search-workers", type=int)
|
||||
parser.add_argument("--tn-search-repeats", type=int, default=128)
|
||||
parser.add_argument("--tn-search-time", type=float, default=60.0)
|
||||
parser.add_argument("--tn-search-backend", choices=("processpool", "dask"))
|
||||
parser.add_argument("--dask-address")
|
||||
parser.add_argument("--dask-close-workers", action="store_true")
|
||||
parser.add_argument("--tn-save-tree")
|
||||
parser.add_argument("--tn-load-tree")
|
||||
parser.add_argument("--tn-search-only", action="store_true")
|
||||
parser.add_argument("--no-tn-stats", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
rank = 0
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
module = load_module(args.case_module)
|
||||
if not hasattr(module, "build_circuit"):
|
||||
raise ValueError("case_module must define build_circuit.")
|
||||
|
||||
circuit = call_builder(
|
||||
module.build_circuit,
|
||||
nqubits=args.nqubits,
|
||||
nlayers=args.nlayers,
|
||||
seed=args.seed,
|
||||
)
|
||||
observable = load_observable(args, module)
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz="tn",
|
||||
mpi=args.mpi,
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=args.quimb_backend,
|
||||
dtype=args.dtype,
|
||||
torch_threads=args.torch_threads,
|
||||
parallel_opts=build_parallel_opts(args),
|
||||
)
|
||||
|
||||
if rank == 0:
|
||||
mode = "MPI" if args.mpi else "serial"
|
||||
print(
|
||||
f"backend=cpu ansatz=TN mode={mode} case={Path(args.case_module).name} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} seed={args.seed} "
|
||||
f"quimb_backend={args.quimb_backend} dtype={args.dtype} "
|
||||
f"torch_threads={args.torch_threads}",
|
||||
flush=True,
|
||||
)
|
||||
print("observable exact value abs_error rel_error seconds", flush=True)
|
||||
|
||||
exact = None
|
||||
if args.exact and rank == 0:
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, observable, args.nqubits)
|
||||
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if args.mpi and result.rank != 0:
|
||||
return
|
||||
|
||||
abs_error = float("nan") if exact is None else abs(result.value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"custom {exact_text} {result.value:.16e} "
|
||||
f"{abs_error:.6e} {rel_error:.6e} {result.seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for stat in result.parallel_stats or ():
|
||||
cost = stat["path_cost"]
|
||||
search_stats = stat.get("search_stats", {})
|
||||
print(
|
||||
"tn_term_summary "
|
||||
f"term={stat.get('term_index', 0)} "
|
||||
f"search_seconds={stat.get('search_seconds', float('nan')):.3f} "
|
||||
f"contract_seconds={stat.get('contract_seconds', float('nan')):.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', 'na')} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={cost.get('slices')} "
|
||||
f"log10_flops={cost.get('log10_flops', float('nan')):.3f} "
|
||||
f"log10_write={cost.get('log10_write', float('nan')):.3f} "
|
||||
f"log2_size={cost.get('log2_size', float('nan')):.3f} "
|
||||
f"peak_memory_gib={cost.get('peak_memory_gib', float('nan')):.3g} "
|
||||
f"rank_slices={stat.get('rank_slices')}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,93 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
CASE="${CASE:-main1}"
|
||||
OBSERVABLES="${OBSERVABLES:-long_z_string}"
|
||||
NQUBITS="${NQUBITS:-34}"
|
||||
NLAYERS="${NLAYERS:-20}"
|
||||
TORCH_THREADS="${TORCH_THREADS:-48}"
|
||||
SEARCH_REPEATS="${SEARCH_REPEATS:-2048}"
|
||||
SEARCH_TIME="${SEARCH_TIME:-300}"
|
||||
TN_TARGET_SIZE="${TN_TARGET_SIZE:-8589934592}"
|
||||
TN_TARGET_SLICES="${TN_TARGET_SLICES:-}"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
DTYPE="${DTYPE:-complex64}"
|
||||
TREE_DIR="${TREE_DIR:-trees/contest_tn}"
|
||||
DASK_ADDRESS="${DASK_ADDRESS:-tcp://10.20.1.103:8786}"
|
||||
MPIEXEC_FULL="${MPIEXEC_FULL:-mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2}"
|
||||
SYNC_TREES="${SYNC_TREES:-1}"
|
||||
SYNC_HOSTS="${SYNC_HOSTS:-${WORKER_HOSTS:-}}"
|
||||
SSH_BIN="${SSH_BIN:-ssh}"
|
||||
|
||||
export TCM_ENABLE="${TCM_ENABLE:-1}"
|
||||
|
||||
tn_slice_args=(--tn-target-size "$TN_TARGET_SIZE")
|
||||
if [[ -n "$TN_TARGET_SLICES" ]]; then
|
||||
tn_slice_args+=(--tn-target-slices "$TN_TARGET_SLICES")
|
||||
fi
|
||||
|
||||
is_local_host() {
|
||||
local host="$1"
|
||||
[[ "$host" == "localhost" || "$host" == "127.0.0.1" ]] && return 0
|
||||
[[ "$host" == "$(hostname)" ]] && return 0
|
||||
[[ "$host" == "$(hostname -f 2>/dev/null || true)" ]] && return 0
|
||||
hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx "$host"
|
||||
}
|
||||
|
||||
sync_trees_to_hosts() {
|
||||
[[ "$SYNC_TREES" == "1" ]] || return 0
|
||||
[[ -n "$SYNC_HOSTS" ]] || return 0
|
||||
|
||||
local src_dir="$TREE_DIR"
|
||||
local dst_dir="$TREE_DIR"
|
||||
if [[ "$TREE_DIR" != /* ]]; then
|
||||
src_dir="$ROOT_DIR/$TREE_DIR"
|
||||
dst_dir="$ROOT_DIR/$TREE_DIR"
|
||||
fi
|
||||
|
||||
for host in $SYNC_HOSTS; do
|
||||
is_local_host "$host" && continue
|
||||
echo "Sync tree dir to $host:$dst_dir"
|
||||
"$SSH_BIN" "$host" "mkdir -p $(printf '%q' "$dst_dir")"
|
||||
if command -v rsync >/dev/null 2>&1; then
|
||||
rsync -a "$src_dir/" "$host:$dst_dir/"
|
||||
else
|
||||
scp -q "$src_dir"/*.pkl "$host:$dst_dir/"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
tools/manage_tn_dask_cluster.sh start
|
||||
|
||||
echo "Search with dask: $DASK_ADDRESS"
|
||||
"$PYTHON_BIN" -u tools/tn_contest_runner.py search \
|
||||
--case "$CASE" \
|
||||
--nqubits "$NQUBITS" \
|
||||
--nlayers "$NLAYERS" \
|
||||
--observables $OBSERVABLES \
|
||||
--tree-dir "$TREE_DIR" \
|
||||
--dask-address "$DASK_ADDRESS" \
|
||||
--torch-threads "$TORCH_THREADS" \
|
||||
--dtype "$DTYPE" \
|
||||
--tn-search-repeats "$SEARCH_REPEATS" \
|
||||
--tn-search-time "$SEARCH_TIME" \
|
||||
"${tn_slice_args[@]}"
|
||||
|
||||
sync_trees_to_hosts
|
||||
|
||||
echo "Contract with MPI: $MPIEXEC_FULL"
|
||||
read -r -a mpi_prefix <<< "$MPIEXEC_FULL"
|
||||
"${mpi_prefix[@]}" "$PYTHON_BIN" -u tools/tn_contest_runner.py contract \
|
||||
--mpi \
|
||||
--case "$CASE" \
|
||||
--nqubits "$NQUBITS" \
|
||||
--nlayers "$NLAYERS" \
|
||||
--observables $OBSERVABLES \
|
||||
--tree-dir "$TREE_DIR" \
|
||||
--torch-threads "$TORCH_THREADS" \
|
||||
--dtype "$DTYPE" \
|
||||
"${tn_slice_args[@]}"
|
||||
@@ -1,340 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
# Contest-style Vidal/MPI MPS cases.
|
||||
#
|
||||
# Usage:
|
||||
# tools/run_vidal_mpi_contest_cases.sh main1
|
||||
# tools/run_vidal_mpi_contest_cases.sh main2
|
||||
# tools/run_vidal_mpi_contest_cases.sh strong
|
||||
# tools/run_vidal_mpi_contest_cases.sh all
|
||||
#
|
||||
# Common overrides:
|
||||
# PYTHON_BIN=.venv/bin/python
|
||||
# MPIEXEC=mpiexec
|
||||
# MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2"
|
||||
# HOSTFILE=hostfile # optional; used only if the file exists
|
||||
# RANKS=8
|
||||
# TORCH_THREADS=8
|
||||
# CUT_RATIO=1e-12
|
||||
# OBS_FILTER="boundary_ZZ_q2 ring_xz dense3_spread complex_iZ0"
|
||||
#
|
||||
# Per-case overrides:
|
||||
# MAIN1_NQ=128 MAIN1_LAYERS=50 MAIN1_BOND=1024 MAIN1_SEED=31001
|
||||
# MAIN2_NQ=128 MAIN2_LAYERS=64 MAIN2_BOND=2048 MAIN2_SEED=31002
|
||||
# STRONG_NQ=256 STRONG_LAYERS=64 STRONG_BOND=2048 STRONG_SEED=41001
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-}"
|
||||
RANKS="${RANKS:-4}"
|
||||
TORCH_THREADS="${TORCH_THREADS:-1}"
|
||||
CUT_RATIO="${CUT_RATIO:-1e-12}"
|
||||
OBS_FILTER="${OBS_FILTER:-}"
|
||||
|
||||
RUNNER_DIR="$ROOT_DIR/.tmp"
|
||||
mkdir -p "$RUNNER_DIR"
|
||||
RUNNER="$(mktemp "$RUNNER_DIR/qibotn_vidal_contest.XXXXXX.py")"
|
||||
cleanup() {
|
||||
rm -f "$RUNNER"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
cat > "$RUNNER" <<'PY'
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind in ("rxx_rzz", "scramble"):
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind == "reversed_cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 else gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 == 0 else gates.CNOT(q, q + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
elif kind == "scramble":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def open_zz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 1):
|
||||
form += (1.0 / (nqubits - 1)) * Z(q) * Z(q + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def range2_xx(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 2):
|
||||
form += (1.0 / (nqubits - 2)) * X(q) * X(q + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def dense_observable(nqubits, qubits, seed, dim):
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def observables_for_case(nqubits, seed):
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
return [
|
||||
("boundary_ZZ_q1", hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))),
|
||||
("boundary_ZZ_q2", hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))),
|
||||
("boundary_ZZ_q3", hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))),
|
||||
(
|
||||
"long_Z_5_sites",
|
||||
hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last)),
|
||||
),
|
||||
(
|
||||
"mixed_XZYZX",
|
||||
hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last)),
|
||||
),
|
||||
("ring_xz", ring_xz(nqubits)),
|
||||
("open_zz", open_zz(nqubits)),
|
||||
("range2_xx", range2_xx(nqubits)),
|
||||
("complex_iZ0", hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))),
|
||||
("dense2_mid", dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)),
|
||||
("dense3_spread", dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)),
|
||||
]
|
||||
|
||||
|
||||
def run_case(args):
|
||||
set_torch_threads(args.torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
circuit = build_circuit(args.kind, args.nqubits, args.nlayers, args.seed)
|
||||
observables = observables_for_case(args.nqubits, args.seed)
|
||||
if args.obs_filter:
|
||||
wanted = set(args.obs_filter.split(","))
|
||||
observables = [(name, obs) for name, obs in observables if name in wanted]
|
||||
if not observables:
|
||||
raise ValueError(f"OBS_FILTER matched no observables: {args.obs_filter!r}")
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
"case "
|
||||
f"label={args.label} kind={args.kind} ranks={size} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} gates={len(circuit.queue)} "
|
||||
f"bond={args.bond} cut_ratio={args.cut_ratio:g} "
|
||||
f"torch_threads={args.torch_threads} seed={args.seed} "
|
||||
f"obs_filter={args.obs_filter or 'all'}",
|
||||
flush=True,
|
||||
)
|
||||
print(
|
||||
"observable value seconds trunc_sum trunc_max status",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for obs_name, observable in observables:
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
status = "ok"
|
||||
except Exception as exc: # pragma: no cover - printed for manual runs
|
||||
value = np.nan
|
||||
status = type(exc).__name__ + ":" + str(exc).split("\n", 1)[0]
|
||||
seconds = time.perf_counter() - start
|
||||
|
||||
if rank == 0:
|
||||
print(
|
||||
f"{obs_name} {value!r} {seconds:.3f} "
|
||||
f"{backend.last_truncation_error:.6e} "
|
||||
f"{backend.last_max_truncation_error:.6e} {status}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--label", required=True)
|
||||
parser.add_argument("--kind", choices=("reversed_cnot", "rxx_rzz", "scramble"), required=True)
|
||||
parser.add_argument("--nqubits", type=int, required=True)
|
||||
parser.add_argument("--nlayers", type=int, required=True)
|
||||
parser.add_argument("--bond", type=int, required=True)
|
||||
parser.add_argument("--cut-ratio", type=float, required=True)
|
||||
parser.add_argument("--seed", type=int, required=True)
|
||||
parser.add_argument("--torch-threads", type=int, required=True)
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
run_case(parser.parse_args())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
PY
|
||||
|
||||
if [[ -n "${MPIEXEC_FULL:-}" ]]; then
|
||||
read -r -a mpi_prefix <<< "$MPIEXEC_FULL"
|
||||
else
|
||||
mpi_prefix=("$MPIEXEC")
|
||||
if [[ -n "$HOSTFILE" && -f "$HOSTFILE" ]]; then
|
||||
mpi_prefix+=("-hostfile" "$HOSTFILE")
|
||||
fi
|
||||
mpi_prefix+=("-n" "$RANKS")
|
||||
fi
|
||||
|
||||
run_case() {
|
||||
local label="$1"
|
||||
local kind="$2"
|
||||
local nq="$3"
|
||||
local layers="$4"
|
||||
local bond="$5"
|
||||
local seed="$6"
|
||||
|
||||
echo
|
||||
echo "Running $label: kind=$kind nqubits=$nq layers=$layers bond=$bond seed=$seed"
|
||||
echo "MPI: ${mpi_prefix[*]}"
|
||||
"${mpi_prefix[@]}" "$PYTHON_BIN" -u "$ROOT_DIR/tools/vidal_mpi_contest_runner.py" \
|
||||
--label "$label" \
|
||||
--kind "$kind" \
|
||||
--nqubits "$nq" \
|
||||
--nlayers "$layers" \
|
||||
--bond "$bond" \
|
||||
--cut-ratio "$CUT_RATIO" \
|
||||
--seed "$seed" \
|
||||
--torch-threads "$TORCH_THREADS" \
|
||||
--obs-filter "$(tr ' ' ',' <<< "$OBS_FILTER")"
|
||||
}
|
||||
|
||||
case "${1:-help}" in
|
||||
main1)
|
||||
run_case \
|
||||
"main1-reversed-cnot" \
|
||||
"reversed_cnot" \
|
||||
"${MAIN1_NQ:-128}" \
|
||||
"${MAIN1_LAYERS:-50}" \
|
||||
"${MAIN1_BOND:-1024}" \
|
||||
"${MAIN1_SEED:-31001}"
|
||||
;;
|
||||
main2)
|
||||
run_case \
|
||||
"main2-rxx-rzz" \
|
||||
"rxx_rzz" \
|
||||
"${MAIN2_NQ:-128}" \
|
||||
"${MAIN2_LAYERS:-64}" \
|
||||
"${MAIN2_BOND:-2048}" \
|
||||
"${MAIN2_SEED:-31002}"
|
||||
;;
|
||||
strong)
|
||||
run_case \
|
||||
"strong-scramble" \
|
||||
"scramble" \
|
||||
"${STRONG_NQ:-256}" \
|
||||
"${STRONG_LAYERS:-64}" \
|
||||
"${STRONG_BOND:-2048}" \
|
||||
"${STRONG_SEED:-41001}"
|
||||
;;
|
||||
all)
|
||||
"$0" main1
|
||||
"$0" main2
|
||||
"$0" strong
|
||||
;;
|
||||
smoke)
|
||||
MAIN1_NQ="${MAIN1_NQ:-32}" \
|
||||
MAIN1_LAYERS="${MAIN1_LAYERS:-6}" \
|
||||
MAIN1_BOND="${MAIN1_BOND:-128}" \
|
||||
"$0" main1
|
||||
;;
|
||||
help|*)
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_vidal_mpi_contest_cases.sh [main1|main2|strong|all|smoke]
|
||||
|
||||
Cases:
|
||||
main1 128 qubits, 50 layers, reversed-CNOT brickwall, chi=1024
|
||||
main2 128 qubits, 64 layers, RXX/RZZ brickwall, chi=2048
|
||||
strong 256 qubits, 64 layers, RXX/RZZ + periodic SWAP scramble, chi=2048
|
||||
smoke Small syntax/runtime check of main1
|
||||
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2"
|
||||
HOSTFILE=hostfile
|
||||
RANKS=8
|
||||
TORCH_THREADS=8
|
||||
CUT_RATIO=1e-12
|
||||
OBS_FILTER="boundary_ZZ_q2 ring_xz dense3_spread complex_iZ0"
|
||||
|
||||
Per-case overrides:
|
||||
MAIN1_NQ=128 MAIN1_LAYERS=50 MAIN1_BOND=1024 MAIN1_SEED=31001
|
||||
MAIN2_NQ=128 MAIN2_LAYERS=64 MAIN2_BOND=2048 MAIN2_SEED=31002
|
||||
STRONG_NQ=256 STRONG_LAYERS=64 STRONG_BOND=2048 STRONG_SEED=41001
|
||||
EOF
|
||||
exit 2
|
||||
;;
|
||||
esac
|
||||
@@ -1,70 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
NQ="${NQ:-34}"
|
||||
LAYERS="${LAYERS:-20}"
|
||||
BOND="${BOND:-512}"
|
||||
SEED="${SEED:-42}"
|
||||
RANKS="${RANKS:-1 2 4}"
|
||||
THREADS="${THREADS:-32 32 16}"
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
CIRCUIT="${CIRCUIT:-brickwall_cnot}"
|
||||
OBSERVABLE="${OBSERVABLE:-ring_xz}"
|
||||
EXACT="${EXACT:-0}"
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
if [[ "${1:-help}" != "run" ]]; then
|
||||
cat >&2 <<'EOF'
|
||||
Usage: tools/run_vidal_segment_mpi_scan.sh run
|
||||
|
||||
Overrides:
|
||||
NQ=34 LAYERS=20 BOND=512 SEED=42
|
||||
RANKS="1 2 4" THREADS="32 32 16"
|
||||
CIRCUIT=brickwall_cnot OBSERVABLE=ring_xz
|
||||
EXACT=1
|
||||
PYTHON_BIN=.venv/bin/python MPIEXEC=mpiexec
|
||||
EOF
|
||||
if [[ "${1:-help}" == "help" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
exit 2
|
||||
fi
|
||||
|
||||
read -r -a ranks <<< "$RANKS"
|
||||
read -r -a threads <<< "$THREADS"
|
||||
|
||||
if [[ "${#ranks[@]}" != "${#threads[@]}" ]]; then
|
||||
echo "RANKS and THREADS must have the same number of entries." >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
common=(
|
||||
--nqubits "$NQ"
|
||||
--nlayers "$LAYERS"
|
||||
--bond "$BOND"
|
||||
--seed "$SEED"
|
||||
--mps
|
||||
--circuits "$CIRCUIT"
|
||||
--observables "$OBSERVABLE"
|
||||
)
|
||||
|
||||
if [[ "$EXACT" == "1" ]]; then
|
||||
common+=(--exact)
|
||||
fi
|
||||
|
||||
for idx in "${!ranks[@]}"; do
|
||||
nrank="${ranks[$idx]}"
|
||||
nthr="${threads[$idx]}"
|
||||
if [[ "$nrank" == "1" ]]; then
|
||||
echo "== Vidal serial ranks=1 torch_threads=$nthr =="
|
||||
"$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
"${common[@]}" --torch-threads "$nthr"
|
||||
else
|
||||
echo "== Vidal segmented MPI ranks=$nrank torch_threads=$nthr =="
|
||||
"$MPIEXEC" -n "$nrank" "$PYTHON_BIN" -u benchmark_cpu_expectation.py \
|
||||
"${common[@]}" --torch-threads "$nthr" --mpi
|
||||
fi
|
||||
done
|
||||
@@ -1,59 +0,0 @@
|
||||
"""Slice an existing saved cotengra tree without re-running path search."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import pickle
|
||||
from pathlib import Path
|
||||
|
||||
from qibotn.parallel import contraction_tree_costs
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("input", help="Input pickle saved by --tn-save-tree.")
|
||||
parser.add_argument("output", help="Output pickle path.")
|
||||
parser.add_argument("--term", type=int, default=0)
|
||||
parser.add_argument("--target-slices", type=int, default=2)
|
||||
parser.add_argument("--max-repeats", type=int, default=64)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = Path(args.input)
|
||||
output_path = Path(args.output)
|
||||
with input_path.open("rb") as f:
|
||||
payload = pickle.load(f)
|
||||
|
||||
trees = payload["trees"] if isinstance(payload, dict) else payload
|
||||
if not isinstance(trees, (list, tuple)):
|
||||
trees = [trees]
|
||||
tree = trees[args.term]
|
||||
|
||||
print("original", contraction_tree_costs(tree), flush=True)
|
||||
sliced = tree.slice(
|
||||
target_slices=args.target_slices,
|
||||
max_repeats=args.max_repeats,
|
||||
seed=args.seed,
|
||||
)
|
||||
print("sliced", contraction_tree_costs(sliced), flush=True)
|
||||
print(f"sliced_inds={sliced.sliced_inds}", flush=True)
|
||||
|
||||
new_trees = list(trees)
|
||||
new_trees[args.term] = sliced
|
||||
|
||||
if isinstance(payload, dict):
|
||||
out_payload = dict(payload)
|
||||
out_payload["trees"] = new_trees
|
||||
out_payload["costs"] = [contraction_tree_costs(t) for t in new_trees]
|
||||
out_payload["nterms"] = len(new_trees)
|
||||
else:
|
||||
out_payload = new_trees
|
||||
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_path.open("wb") as f:
|
||||
pickle.dump(out_payload, f)
|
||||
print(f"saved {output_path}", flush=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,440 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
"""Contest-style CPU TN path search and contraction runner.
|
||||
|
||||
This file is intentionally self-contained: define contest circuits and
|
||||
observables here, run path search once, then load the saved trees for repeated
|
||||
MPI contractions.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import numpy as np
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
if str(SRC) not in sys.path:
|
||||
sys.path.insert(0, str(SRC))
|
||||
|
||||
from qibotn.expectation_runner import ( # noqa: E402
|
||||
ExpectationConfig,
|
||||
exact_for_observable,
|
||||
run_cpu_expectation,
|
||||
)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class CaseSpec:
|
||||
circuit_kind: str
|
||||
observables: tuple[str, ...]
|
||||
nqubits: int
|
||||
nlayers: int
|
||||
seed: int
|
||||
target_slices: int | None = None
|
||||
|
||||
|
||||
CASES = {
|
||||
"main1": CaseSpec(
|
||||
circuit_kind="rxx_rzz_chain",
|
||||
observables=("ring_xz",),
|
||||
nqubits=34,
|
||||
nlayers=20,
|
||||
seed=31001,
|
||||
target_slices=None,
|
||||
),
|
||||
"main2": CaseSpec(
|
||||
circuit_kind="scramble_chain",
|
||||
observables=("open_zz", "range2_xx"),
|
||||
nqubits=36,
|
||||
nlayers=18,
|
||||
seed=31002,
|
||||
target_slices=None,
|
||||
),
|
||||
"strong": CaseSpec(
|
||||
circuit_kind="reversed_cnot",
|
||||
observables=("ring_xz", "long_z_string"),
|
||||
nqubits=40,
|
||||
nlayers=24,
|
||||
seed=41001,
|
||||
target_slices=None,
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def add_single_qubit_layer(circuit, nqubits, rng, include_rx=False):
|
||||
for qubit in range(nqubits):
|
||||
circuit.add(gates.RY(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if include_rx:
|
||||
circuit.add(gates.RX(qubit, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
"""Define contest circuits here."""
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
if kind == "rxx_rzz_chain":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
|
||||
elif kind == "scramble_chain":
|
||||
add_single_qubit_layer(circuit, nqubits, rng, include_rx=True)
|
||||
for qubit in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(qubit, qubit + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(qubit, qubit + 1))
|
||||
|
||||
elif kind == "reversed_cnot":
|
||||
add_single_qubit_layer(circuit, nqubits, rng)
|
||||
for qubit in range(0, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
for qubit in range(1, nqubits - 1, 2):
|
||||
gate = gates.CNOT(qubit + 1, qubit) if layer % 2 == 0 else gates.CNOT(qubit, qubit + 1)
|
||||
circuit.add(gate)
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def pauli_sum_observable(kind, nqubits, seed):
|
||||
"""Define contest observables here.
|
||||
|
||||
TN path currently expects Pauli products / SymbolicHamiltonian terms.
|
||||
Keep production contest observables Hermitian unless complex output is
|
||||
explicitly required by the scoring rule.
|
||||
"""
|
||||
del seed
|
||||
if kind == "ring_xz":
|
||||
form = 0
|
||||
for qubit in range(nqubits):
|
||||
form += 0.5 * X(qubit) * Z((qubit + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "open_zz":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 1):
|
||||
form += (1.0 / max(1, nqubits - 1)) * Z(qubit) * Z(qubit + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "range2_xx":
|
||||
form = 0
|
||||
for qubit in range(nqubits - 2):
|
||||
form += (1.0 / max(1, nqubits - 2)) * X(qubit) * X(qubit + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "long_z_string":
|
||||
stride = max(1, nqubits // 16)
|
||||
form = None
|
||||
for qubit in range(0, nqubits, stride):
|
||||
form = Z(qubit) if form is None else form * Z(qubit)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
if kind == "mixed_local":
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
form = 0.25 * X(0) - 0.5 * Z(nqubits - 1)
|
||||
form += 0.125 * X(q1) * Z(q2) * Y(q3)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def tree_path(tree_dir, case_name, obs_name, nqubits, nlayers, target_slices):
|
||||
slice_label = "auto" if target_slices is None else f"s{target_slices}"
|
||||
return (
|
||||
Path(tree_dir)
|
||||
/ f"{case_name}_{obs_name}_{nqubits}q{nlayers}l_{slice_label}.pkl"
|
||||
)
|
||||
|
||||
|
||||
def build_parallel_opts(args, tree_file=None, search_only=False):
|
||||
slicing_opts = {}
|
||||
if args.tn_target_slices is not None:
|
||||
slicing_opts["target_slices"] = args.tn_target_slices
|
||||
if args.tn_target_size is not None:
|
||||
slicing_opts["target_size"] = args.tn_target_size
|
||||
|
||||
opts = {
|
||||
"slicing_opts": slicing_opts or None,
|
||||
"search_workers": args.tn_search_workers or args.torch_threads,
|
||||
"max_repeats": args.tn_search_repeats,
|
||||
"max_time": args.tn_search_time,
|
||||
"print_stats": False,
|
||||
}
|
||||
if args.tn_search_backend is not None:
|
||||
opts["search_backend"] = args.tn_search_backend
|
||||
if args.dask_address is not None:
|
||||
opts["dask_address"] = args.dask_address
|
||||
if args.dask_close_workers:
|
||||
opts["dask_close_workers"] = True
|
||||
if args.tn_debug_trials:
|
||||
opts["debug_trials"] = True
|
||||
if search_only:
|
||||
opts["search_only"] = True
|
||||
opts["save_tree_path"] = str(tree_file)
|
||||
elif tree_file is not None:
|
||||
opts["load_tree_path"] = str(tree_file)
|
||||
return opts
|
||||
|
||||
|
||||
def run_one(args, case_name, obs_name, mode):
|
||||
case = CASES[case_name]
|
||||
circuit = build_circuit(case.circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
observable = pauli_sum_observable(obs_name, args.nqubits, args.seed)
|
||||
path = tree_path(
|
||||
args.tree_dir,
|
||||
case_name,
|
||||
obs_name,
|
||||
args.nqubits,
|
||||
args.nlayers,
|
||||
args.tn_target_slices,
|
||||
)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
rank = 0
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
rank = MPI.COMM_WORLD.Get_rank()
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
f"mode={mode} case={case_name} circuit={case.circuit_kind} "
|
||||
f"observable={obs_name} nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"seed={args.seed} gates={len(circuit.queue)} tree={path}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
if mode == "contract" and not path.exists():
|
||||
raise FileNotFoundError(f"Missing tree file: {path}. Run search first.")
|
||||
|
||||
exact = None
|
||||
if args.exact and rank == 0 and mode != "search":
|
||||
if args.nqubits > args.exact_max_qubits:
|
||||
raise ValueError(
|
||||
f"--exact is limited to {args.exact_max_qubits} qubits by default."
|
||||
)
|
||||
exact = exact_for_observable(circuit, observable, args.nqubits)
|
||||
|
||||
config = ExpectationConfig(
|
||||
ansatz="tn",
|
||||
mpi=args.mpi,
|
||||
bond=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
quimb_backend=args.quimb_backend,
|
||||
dtype=args.dtype,
|
||||
torch_threads=args.torch_threads,
|
||||
parallel_opts=build_parallel_opts(
|
||||
args,
|
||||
tree_file=path,
|
||||
search_only=(mode == "search"),
|
||||
),
|
||||
)
|
||||
result = run_cpu_expectation(circuit, observable, config)
|
||||
if args.mpi and result.rank != 0:
|
||||
return
|
||||
|
||||
if mode == "search":
|
||||
print(f"searched observable={obs_name} tree={path}", flush=True)
|
||||
else:
|
||||
abs_error = float("nan") if exact is None else abs(result.value - exact)
|
||||
rel_error = float("nan") if exact is None else abs_error / max(abs(exact), 1e-15)
|
||||
exact_text = "nan" if exact is None else f"{exact:.16e}"
|
||||
print(
|
||||
f"result observable={obs_name} exact={exact_text} "
|
||||
f"value={result.value:.16e} abs_error={abs_error:.6e} "
|
||||
f"rel_error={rel_error:.6e} seconds={result.seconds:.3f}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for stat in result.parallel_stats or ():
|
||||
cost = stat["path_cost"]
|
||||
search_stats = stat.get("search_stats", {})
|
||||
print(
|
||||
"tn_term_summary "
|
||||
f"observable={obs_name} "
|
||||
f"term={stat.get('term_index', 0)} "
|
||||
f"search_seconds={stat.get('search_seconds', float('nan')):.3f} "
|
||||
f"contract_seconds={stat.get('contract_seconds', float('nan')):.3f} "
|
||||
f"completed_trials={search_stats.get('completed_trials', 'na')} "
|
||||
f"finite_trials={search_stats.get('finite_trials', 'na')} "
|
||||
f"failed_trials={search_stats.get('failed_trials', 'na')} "
|
||||
f"requested_trials={search_stats.get('requested_trials', 'na')} "
|
||||
f"best_score={search_stats.get('best_score', float('nan')):.6g} "
|
||||
f"slices={cost.get('nslices')} "
|
||||
f"log10_flops={cost.get('log10_flops', float('nan')):.3f} "
|
||||
f"log10_write={cost.get('log10_write', float('nan')):.3f} "
|
||||
f"log2_size={cost.get('log2_size', float('nan')):.3f} "
|
||||
f"peak_memory_gib={cost.get('peak_memory_gib', float('nan')):.3g} "
|
||||
f"rank_slices={stat.get('rank_slices')}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def selected_observables(args, case):
|
||||
if args.observables:
|
||||
return tuple(args.observables)
|
||||
if args.obs_filter:
|
||||
return tuple(x.strip() for x in args.obs_filter.split(",") if x.strip())
|
||||
return case.observables
|
||||
|
||||
|
||||
def apply_case_defaults(args):
|
||||
case = CASES[args.case]
|
||||
if args.nqubits is None:
|
||||
args.nqubits = case.nqubits
|
||||
if args.nlayers is None:
|
||||
args.nlayers = case.nlayers
|
||||
if args.seed is None:
|
||||
args.seed = case.seed
|
||||
if args.tn_target_slices is None:
|
||||
args.tn_target_slices = case.target_slices
|
||||
args.observables = selected_observables(args, case)
|
||||
|
||||
|
||||
def stop_dask_cluster(args):
|
||||
if args.keep_dask or args.tn_search_backend != "dask" or not args.dask_address:
|
||||
return
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
if MPI.COMM_WORLD.Get_rank() != 0:
|
||||
return
|
||||
script = ROOT / "tools" / "manage_tn_dask_cluster.sh"
|
||||
if not script.exists():
|
||||
print(f"dask_stop_skipped reason=missing_script path={script}", flush=True)
|
||||
return
|
||||
|
||||
env = os.environ.copy()
|
||||
parsed = urlparse(args.dask_address)
|
||||
if parsed.hostname:
|
||||
env.setdefault("SCHEDULER_HOST", parsed.hostname)
|
||||
if parsed.port:
|
||||
env.setdefault("SCHEDULER_PORT", str(parsed.port))
|
||||
|
||||
print("dask_stop_after_search start", flush=True)
|
||||
subprocess.run([str(script), "stop"], cwd=str(ROOT), env=env, check=False)
|
||||
print("dask_stop_after_search done", flush=True)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("mode", choices=("search", "contract", "all", "validate", "list"))
|
||||
parser.add_argument("--case", choices=sorted(CASES), default="main1")
|
||||
parser.add_argument("--observables", nargs="+")
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
parser.add_argument("--tree-dir", default="trees/contest_tn")
|
||||
parser.add_argument("--nqubits", type=int)
|
||||
parser.add_argument("--nlayers", type=int)
|
||||
parser.add_argument("--seed", type=int)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument("--exact", action="store_true")
|
||||
parser.add_argument("--exact-max-qubits", type=int, default=24)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=optional_int, default=1024)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, default=1e-12)
|
||||
parser.add_argument("--torch-threads", type=int, default=8)
|
||||
parser.add_argument("--quimb-backend", choices=("numpy", "torch"), default="torch")
|
||||
parser.add_argument("--dtype", choices=("complex128", "complex64"), default="complex64")
|
||||
parser.add_argument("--tn-target-slices", type=int)
|
||||
parser.add_argument("--tn-target-size", type=int, default=2**32)
|
||||
parser.add_argument("--tn-search-workers", type=int)
|
||||
parser.add_argument("--tn-search-repeats", type=int, default=2048)
|
||||
parser.add_argument("--tn-search-time", type=float, default=300.0)
|
||||
parser.add_argument(
|
||||
"--tn-search-backend",
|
||||
choices=("processpool", "dask"),
|
||||
default="dask",
|
||||
help=(
|
||||
"Path-search backend. Defaults to dask. Without --dask-address, "
|
||||
"non-MPI search starts a local dask cluster."
|
||||
),
|
||||
)
|
||||
parser.add_argument("--dask-address")
|
||||
parser.add_argument("--dask-close-workers", action="store_true")
|
||||
parser.add_argument(
|
||||
"--keep-dask",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Keep an external dask cluster running after search. By default, "
|
||||
"tools/manage_tn_dask_cluster.sh stop is called after search when "
|
||||
"--dask-address is used."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tn-debug-trials",
|
||||
action="store_true",
|
||||
help="Print dask worker summary and per-trial start/done logs.",
|
||||
)
|
||||
parser.add_argument("--no-tn-stats", action="store_true")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.mode == "list":
|
||||
for name, case in CASES.items():
|
||||
print(
|
||||
f"{name}: circuit={case.circuit_kind} "
|
||||
f"observables={','.join(case.observables)} "
|
||||
f"nqubits={case.nqubits} nlayers={case.nlayers} "
|
||||
f"seed={case.seed} target_slices={case.target_slices}"
|
||||
)
|
||||
return
|
||||
|
||||
apply_case_defaults(args)
|
||||
set_torch_threads(args.torch_threads)
|
||||
|
||||
modes = ("search", "contract") if args.mode == "all" else (args.mode,)
|
||||
if args.mode == "validate":
|
||||
args.exact = True
|
||||
args.nqubits = min(args.nqubits, args.exact_max_qubits)
|
||||
modes = ("search", "contract")
|
||||
|
||||
for mode in modes:
|
||||
for obs_name in args.observables:
|
||||
run_one(args, args.case, obs_name, mode)
|
||||
if mode == "search":
|
||||
stop_dask_cluster(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,114 +0,0 @@
|
||||
"""Run the 34q/20L TN complex64 benchmark under torch.profiler briefly."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import signal
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from mpi4py import MPI
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--seconds", type=float, default=30.0)
|
||||
parser.add_argument("--out-dir", default="torch_profiles/tn_complex64")
|
||||
parser.add_argument("--torch-threads", type=int, default=48)
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_root = Path(__file__).resolve().parents[1]
|
||||
os.chdir(repo_root)
|
||||
sys.path.insert(0, str(repo_root))
|
||||
|
||||
import torch
|
||||
from torch.profiler import ProfilerActivity, profile
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
out_dir = Path(args.out_dir)
|
||||
if rank == 0:
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
comm.Barrier()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
|
||||
def run_benchmark():
|
||||
import benchmark_cpu_expectation
|
||||
|
||||
sys.argv = [
|
||||
"benchmark_cpu_expectation.py",
|
||||
"--mpi",
|
||||
"--ansatz",
|
||||
"tn",
|
||||
"--nqubits",
|
||||
"34",
|
||||
"--nlayers",
|
||||
"20",
|
||||
"--circuits",
|
||||
"rxx_rzz",
|
||||
"--pauli-pattern",
|
||||
"XZ",
|
||||
"--tn-load-tree",
|
||||
"trees/rxx_rzz_34q20l_s4.pkl",
|
||||
"--quimb-backend",
|
||||
"torch",
|
||||
"--torch-threads",
|
||||
str(args.torch_threads),
|
||||
"--dtype",
|
||||
"complex64",
|
||||
]
|
||||
benchmark_cpu_expectation.main()
|
||||
|
||||
trace_path = out_dir / f"rank{rank}_trace.json"
|
||||
stacks_path = out_dir / f"rank{rank}_stacks.txt"
|
||||
summary_path = out_dir / f"rank{rank}_summary.txt"
|
||||
|
||||
prof = profile(
|
||||
activities=[ProfilerActivity.CPU],
|
||||
record_shapes=True,
|
||||
profile_memory=True,
|
||||
with_stack=True,
|
||||
)
|
||||
|
||||
class ProfileTimeout(Exception):
|
||||
pass
|
||||
|
||||
def alarm_handler(signum, frame):
|
||||
raise ProfileTimeout()
|
||||
|
||||
old_handler = signal.signal(signal.SIGALRM, alarm_handler)
|
||||
signal.setitimer(signal.ITIMER_REAL, args.seconds)
|
||||
try:
|
||||
with prof:
|
||||
try:
|
||||
run_benchmark()
|
||||
except ProfileTimeout:
|
||||
pass
|
||||
finally:
|
||||
signal.setitimer(signal.ITIMER_REAL, 0)
|
||||
signal.signal(signal.SIGALRM, old_handler)
|
||||
|
||||
prof.export_chrome_trace(str(trace_path))
|
||||
try:
|
||||
prof.export_stacks(str(stacks_path), "self_cpu_time_total")
|
||||
except Exception as exc: # pragma: no cover - diagnostic only
|
||||
stacks_path.write_text(f"export_stacks failed: {exc}\n", encoding="utf-8")
|
||||
|
||||
summary = prof.key_averages(group_by_stack_n=5).table(
|
||||
sort_by="self_cpu_time_total",
|
||||
row_limit=40,
|
||||
)
|
||||
summary_path.write_text(summary, encoding="utf-8")
|
||||
|
||||
print(
|
||||
f"torch_profile_done rank={rank}/{size} "
|
||||
f"trace={trace_path} summary={summary_path}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,202 +0,0 @@
|
||||
"""Correctness checks for the Vidal/TEBD MPS fast path.
|
||||
|
||||
The cases here intentionally cover more than the benchmark ring-XZ observable:
|
||||
different nearest-neighbor gate orientations and several Pauli-sum observables.
|
||||
Run serially to compare qibojit/statevector vs Vidal, or under MPI to compare
|
||||
the segmented Vidal executor.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from qibo import Circuit, gates
|
||||
|
||||
from qibotn.backends.vidal_mpi_segment import SegmentVidalMPIExecutor
|
||||
from qibotn.backends.vidal_tebd import VidalTEBDExecutor
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind == "rx_ry_cz":
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind in ("brickwall", "reversed_cnot"):
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
if kind == "reversed_cnot" and (layer % 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
else:
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
if kind == "reversed_cnot" and not (layer % 2):
|
||||
circuit.add(gates.CNOT(q + 1, q))
|
||||
else:
|
||||
circuit.add(gates.CNOT(q, q + 1))
|
||||
elif kind == "rx_ry_cz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.CZ(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
return circuit
|
||||
|
||||
|
||||
def observable_terms(kind, nqubits):
|
||||
if kind == "ring_xz":
|
||||
return [
|
||||
(0.5, (("X", site), ("Z", (site + 1) % nqubits)))
|
||||
for site in range(nqubits)
|
||||
]
|
||||
if kind == "open_zz":
|
||||
return [
|
||||
(1.0 / (nqubits - 1), (("Z", site), ("Z", site + 1)))
|
||||
for site in range(nqubits - 1)
|
||||
]
|
||||
if kind == "mixed_local":
|
||||
terms = [(0.25, (("X", 0),)), (-0.5, (("Z", nqubits - 1),))]
|
||||
terms += [
|
||||
(0.125, (("Y", site), ("Y", site + 1)))
|
||||
for site in range(0, nqubits - 1, 3)
|
||||
]
|
||||
return terms
|
||||
raise ValueError(f"Unknown observable kind {kind!r}.")
|
||||
|
||||
|
||||
def exact_pauli_sum(circuit, terms, nqubits):
|
||||
state = circuit().state(numpy=True).reshape(-1)
|
||||
indices = np.arange(state.size, dtype=np.int64)
|
||||
value = 0.0 + 0.0j
|
||||
for coeff, ops in terms:
|
||||
flipped = indices.copy()
|
||||
phase = np.ones(state.size, dtype=np.complex128)
|
||||
for name, site in ops:
|
||||
shift = nqubits - 1 - site
|
||||
bit = (indices >> shift) & 1
|
||||
name = name.upper()
|
||||
if name == "X":
|
||||
flipped ^= 1 << shift
|
||||
elif name == "Y":
|
||||
flipped ^= 1 << shift
|
||||
phase *= 1j * (1 - 2 * bit)
|
||||
elif name == "Z":
|
||||
phase *= 1 - 2 * bit
|
||||
elif name != "I":
|
||||
raise ValueError(f"Unsupported Pauli {name!r}.")
|
||||
value += coeff * np.vdot(state[flipped], phase * state)
|
||||
return float(value.real)
|
||||
|
||||
|
||||
def run_vidal(circuit, terms, nqubits, bond, tensor_module):
|
||||
executor = VidalTEBDExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return float(executor.expectation_pauli_sum(terms))
|
||||
|
||||
|
||||
def run_segment_mpi(circuit, terms, nqubits, bond, tensor_module, comm):
|
||||
executor = SegmentVidalMPIExecutor(
|
||||
nqubits=nqubits,
|
||||
max_bond=bond,
|
||||
cut_ratio=1e-12,
|
||||
tensor_module=tensor_module,
|
||||
comm=comm,
|
||||
)
|
||||
executor.run_circuit(circuit)
|
||||
return executor.expectation_pauli_sum_root(terms)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--nqubits", type=int, default=16)
|
||||
parser.add_argument("--nlayers", type=int, default=6)
|
||||
parser.add_argument("--bond", "--bonds", dest="bond", type=int, default=512)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--tensor-module", choices=("torch", "numpy"), default="torch")
|
||||
parser.add_argument("--torch-threads", type=int, default=32)
|
||||
parser.add_argument("--mpi", action="store_true")
|
||||
parser.add_argument(
|
||||
"--circuits",
|
||||
nargs="+",
|
||||
default=("brickwall", "reversed_cnot", "rx_ry_cz"),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--observables",
|
||||
nargs="+",
|
||||
default=("ring_xz", "open_zz", "mixed_local"),
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
torch.set_num_threads(args.torch_threads)
|
||||
comm = None
|
||||
rank = 0
|
||||
size = 1
|
||||
if args.mpi:
|
||||
from mpi4py import MPI
|
||||
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
if rank == 0:
|
||||
mode = f"vidal-segment-mpi/{size}" if args.mpi else "vidal"
|
||||
print(
|
||||
f"mode={mode} nqubits={args.nqubits} nlayers={args.nlayers} "
|
||||
f"bond={args.bond} tensor_module={args.tensor_module}"
|
||||
)
|
||||
print("circuit observable exact value abs_error seconds")
|
||||
|
||||
for circuit_kind in args.circuits:
|
||||
circuit = build_circuit(circuit_kind, args.nqubits, args.nlayers, args.seed)
|
||||
exact = None
|
||||
if rank == 0:
|
||||
exact_values = {
|
||||
obs: exact_pauli_sum(
|
||||
circuit, observable_terms(obs, args.nqubits), args.nqubits
|
||||
)
|
||||
for obs in args.observables
|
||||
}
|
||||
else:
|
||||
exact_values = None
|
||||
if comm is not None:
|
||||
exact_values = comm.bcast(exact_values, root=0)
|
||||
|
||||
for obs_kind in args.observables:
|
||||
terms = observable_terms(obs_kind, args.nqubits)
|
||||
start = time.perf_counter()
|
||||
if args.mpi:
|
||||
value = run_segment_mpi(
|
||||
circuit,
|
||||
terms,
|
||||
args.nqubits,
|
||||
args.bond,
|
||||
args.tensor_module,
|
||||
comm,
|
||||
)
|
||||
else:
|
||||
value = run_vidal(
|
||||
circuit, terms, args.nqubits, args.bond, args.tensor_module
|
||||
)
|
||||
if rank != 0:
|
||||
continue
|
||||
elapsed = time.perf_counter() - start
|
||||
exact = exact_values[obs_kind]
|
||||
print(
|
||||
f"{circuit_kind} {obs_kind} {exact:.16e} {value:.16e} "
|
||||
f"{abs(value - exact):.6e} {elapsed:.3f}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,209 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import math
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
from mpi4py import MPI
|
||||
from qibo import Circuit, gates, hamiltonians
|
||||
from qibo.symbols import X, Y, Z
|
||||
|
||||
from qibotn.backends.vidal import VidalBackend
|
||||
|
||||
|
||||
def optional_int(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return int(text)
|
||||
|
||||
|
||||
def optional_float(text):
|
||||
if isinstance(text, str) and text.lower() in {"none", "null", "inf", "unlimited"}:
|
||||
return None
|
||||
return float(text)
|
||||
|
||||
|
||||
def format_optional(value, fmt="g"):
|
||||
return "None" if value is None else format(value, fmt)
|
||||
|
||||
|
||||
def set_torch_threads(nthreads):
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch.set_num_threads(nthreads)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def build_circuit(kind, nqubits, nlayers, seed):
|
||||
rng = np.random.default_rng(seed)
|
||||
circuit = Circuit(nqubits)
|
||||
|
||||
for layer in range(nlayers):
|
||||
for q in range(nqubits):
|
||||
circuit.add(gates.RY(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
circuit.add(gates.RZ(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
if kind in ("rxx_rzz", "scramble"):
|
||||
circuit.add(gates.RX(q, theta=rng.uniform(-math.pi, math.pi)))
|
||||
|
||||
if kind == "reversed_cnot":
|
||||
for q in range(0, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 else gates.CNOT(q, q + 1))
|
||||
for q in range(1, nqubits - 1, 2):
|
||||
circuit.add(gates.CNOT(q + 1, q) if layer % 2 == 0 else gates.CNOT(q, q + 1))
|
||||
elif kind == "rxx_rzz":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.9, 0.9)))
|
||||
elif kind == "scramble":
|
||||
for q in range(layer % 2, nqubits - 1, 2):
|
||||
circuit.add(gates.RXX(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
circuit.add(gates.RZZ(q, q + 1, theta=rng.uniform(-0.8, 0.8)))
|
||||
if layer % 5 == 4:
|
||||
circuit.add(gates.SWAP(q, q + 1))
|
||||
else:
|
||||
raise ValueError(f"Unknown circuit kind {kind!r}.")
|
||||
|
||||
return circuit
|
||||
|
||||
|
||||
def ring_xz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits):
|
||||
form += 0.5 * X(q) * Z((q + 1) % nqubits)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def open_zz(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 1):
|
||||
form += (1.0 / (nqubits - 1)) * Z(q) * Z(q + 1)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def range2_xx(nqubits):
|
||||
form = 0
|
||||
for q in range(nqubits - 2):
|
||||
form += (1.0 / (nqubits - 2)) * X(q) * X(q + 2)
|
||||
return hamiltonians.SymbolicHamiltonian(form=form)
|
||||
|
||||
|
||||
def dense_observable(nqubits, qubits, seed, dim):
|
||||
rng = np.random.default_rng(seed)
|
||||
raw = rng.normal(size=(dim, dim)) + 1j * rng.normal(size=(dim, dim))
|
||||
matrix = (raw + raw.conj().T) / 2.0
|
||||
matrix = matrix / np.linalg.norm(matrix)
|
||||
return {"matrix": matrix, "qubits": list(qubits)}
|
||||
|
||||
|
||||
def observables_for_case(nqubits, seed):
|
||||
q1 = nqubits // 4
|
||||
q2 = nqubits // 2
|
||||
q3 = (3 * nqubits) // 4
|
||||
last = nqubits - 1
|
||||
|
||||
return [
|
||||
("boundary_ZZ_q1", hamiltonians.SymbolicHamiltonian(form=Z(q1 - 1) * Z(q1))),
|
||||
("boundary_ZZ_q2", hamiltonians.SymbolicHamiltonian(form=Z(q2 - 1) * Z(q2))),
|
||||
("boundary_ZZ_q3", hamiltonians.SymbolicHamiltonian(form=Z(q3 - 1) * Z(q3))),
|
||||
(
|
||||
"long_Z_5_sites",
|
||||
hamiltonians.SymbolicHamiltonian(form=Z(0) * Z(q1) * Z(q2) * Z(q3) * Z(last)),
|
||||
),
|
||||
(
|
||||
"mixed_XZYZX",
|
||||
hamiltonians.SymbolicHamiltonian(form=X(0) * Z(q1) * Y(q2) * Z(q3) * X(last)),
|
||||
),
|
||||
("ring_xz", ring_xz(nqubits)),
|
||||
("open_zz", open_zz(nqubits)),
|
||||
("range2_xx", range2_xx(nqubits)),
|
||||
("complex_iZ0", hamiltonians.SymbolicHamiltonian(form=1.0j * Z(0))),
|
||||
("dense2_mid", dense_observable(nqubits, (q2 - 1, q2), seed + 101, 4)),
|
||||
("dense3_spread", dense_observable(nqubits, (q1, q2, q3), seed + 202, 8)),
|
||||
]
|
||||
|
||||
|
||||
def run_case(args):
|
||||
set_torch_threads(args.torch_threads)
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
size = comm.Get_size()
|
||||
|
||||
circuit = build_circuit(args.kind, args.nqubits, args.nlayers, args.seed)
|
||||
observables = observables_for_case(args.nqubits, args.seed)
|
||||
if args.obs_filter:
|
||||
wanted = set(args.obs_filter.split(","))
|
||||
observables = [(name, obs) for name, obs in observables if name in wanted]
|
||||
if not observables:
|
||||
raise ValueError(f"OBS_FILTER matched no observables: {args.obs_filter!r}")
|
||||
|
||||
if rank == 0:
|
||||
print("=" * 88, flush=True)
|
||||
print(
|
||||
"case "
|
||||
f"label={args.label} kind={args.kind} ranks={size} "
|
||||
f"nqubits={args.nqubits} nlayers={args.nlayers} gates={len(circuit.queue)} "
|
||||
f"bond={format_optional(args.bond)} "
|
||||
f"cut_ratio={format_optional(args.cut_ratio)} "
|
||||
f"torch_threads={args.torch_threads} seed={args.seed} "
|
||||
f"obs_filter={args.obs_filter or 'all'}",
|
||||
flush=True,
|
||||
)
|
||||
print(
|
||||
"observable value seconds trunc_sum trunc_max status",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
for obs_name, observable in observables:
|
||||
backend = VidalBackend()
|
||||
backend.configure_tn_simulation(
|
||||
max_bond_dimension=args.bond,
|
||||
cut_ratio=args.cut_ratio,
|
||||
tensor_module="torch",
|
||||
mpi_approach="CT",
|
||||
mpi_num_procs=size,
|
||||
fallback=False,
|
||||
)
|
||||
|
||||
comm.Barrier()
|
||||
start = time.perf_counter()
|
||||
try:
|
||||
value = backend.expectation(
|
||||
circuit,
|
||||
observable,
|
||||
preprocess=True,
|
||||
compile_circuit=False,
|
||||
)
|
||||
status = "ok"
|
||||
except Exception as exc: # pragma: no cover - printed for manual runs
|
||||
value = np.nan
|
||||
status = type(exc).__name__ + ":" + str(exc).split("\n", 1)[0]
|
||||
seconds = time.perf_counter() - start
|
||||
|
||||
if rank == 0:
|
||||
print(
|
||||
f"{obs_name} {value!r} {seconds:.3f} "
|
||||
f"{backend.last_truncation_error:.6e} "
|
||||
f"{backend.last_max_truncation_error:.6e} {status}",
|
||||
flush=True,
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--label", required=True)
|
||||
parser.add_argument("--kind", choices=("reversed_cnot", "rxx_rzz", "scramble"), required=True)
|
||||
parser.add_argument("--nqubits", type=int, required=True)
|
||||
parser.add_argument("--nlayers", type=int, required=True)
|
||||
parser.add_argument("--bond", type=optional_int, required=True)
|
||||
parser.add_argument("--cut-ratio", type=optional_float, required=True)
|
||||
parser.add_argument("--seed", type=int, required=True)
|
||||
parser.add_argument("--torch-threads", type=int, required=True)
|
||||
parser.add_argument("--obs-filter", default="")
|
||||
run_case(parser.parse_args())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Reference in New Issue
Block a user