Files
qibotn/tools/torch_profile_tn_complex64.py
jaunatisblue 915c24dc7b
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
赛前稳定版
2026-05-15 09:32:26 +08:00

115 lines
3.0 KiB
Python

"""Run the 34q/20L TN complex64 benchmark under torch.profiler briefly."""
from __future__ import annotations
import argparse
import os
import signal
import sys
from pathlib import Path
from mpi4py import MPI
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--seconds", type=float, default=30.0)
parser.add_argument("--out-dir", default="torch_profiles/tn_complex64")
parser.add_argument("--torch-threads", type=int, default=48)
args = parser.parse_args()
repo_root = Path(__file__).resolve().parents[1]
os.chdir(repo_root)
sys.path.insert(0, str(repo_root))
import torch
from torch.profiler import ProfilerActivity, profile
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()
out_dir = Path(args.out_dir)
if rank == 0:
out_dir.mkdir(parents=True, exist_ok=True)
comm.Barrier()
torch.set_num_threads(args.torch_threads)
def run_benchmark():
import benchmark_cpu_expectation
sys.argv = [
"benchmark_cpu_expectation.py",
"--mpi",
"--ansatz",
"tn",
"--nqubits",
"34",
"--nlayers",
"20",
"--circuits",
"rxx_rzz",
"--pauli-pattern",
"XZ",
"--tn-load-tree",
"trees/rxx_rzz_34q20l_s4.pkl",
"--quimb-backend",
"torch",
"--torch-threads",
str(args.torch_threads),
"--dtype",
"complex64",
]
benchmark_cpu_expectation.main()
trace_path = out_dir / f"rank{rank}_trace.json"
stacks_path = out_dir / f"rank{rank}_stacks.txt"
summary_path = out_dir / f"rank{rank}_summary.txt"
prof = profile(
activities=[ProfilerActivity.CPU],
record_shapes=True,
profile_memory=True,
with_stack=True,
)
class ProfileTimeout(Exception):
pass
def alarm_handler(signum, frame):
raise ProfileTimeout()
old_handler = signal.signal(signal.SIGALRM, alarm_handler)
signal.setitimer(signal.ITIMER_REAL, args.seconds)
try:
with prof:
try:
run_benchmark()
except ProfileTimeout:
pass
finally:
signal.setitimer(signal.ITIMER_REAL, 0)
signal.signal(signal.SIGALRM, old_handler)
prof.export_chrome_trace(str(trace_path))
try:
prof.export_stacks(str(stacks_path), "self_cpu_time_total")
except Exception as exc: # pragma: no cover - diagnostic only
stacks_path.write_text(f"export_stacks failed: {exc}\n", encoding="utf-8")
summary = prof.key_averages(group_by_stack_n=5).table(
sort_by="self_cpu_time_total",
row_limit=40,
)
summary_path.write_text(summary, encoding="utf-8")
print(
f"torch_profile_done rank={rank}/{size} "
f"trace={trace_path} summary={summary_path}",
flush=True,
)
if __name__ == "__main__":
main()