Files
qibotn/tools/run_tn_dask_mpi_all.sh
jaunatisblue 4c7a10d026
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
补充
2026-05-15 11:11:20 +08:00

94 lines
2.7 KiB
Bash
Executable File

#!/usr/bin/env bash
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
cd "$ROOT_DIR"
CASE="${CASE:-main1}"
OBSERVABLES="${OBSERVABLES:-long_z_string}"
NQUBITS="${NQUBITS:-34}"
NLAYERS="${NLAYERS:-20}"
TORCH_THREADS="${TORCH_THREADS:-48}"
SEARCH_REPEATS="${SEARCH_REPEATS:-2048}"
SEARCH_TIME="${SEARCH_TIME:-300}"
TN_TARGET_SIZE="${TN_TARGET_SIZE:-8589934592}"
TN_TARGET_SLICES="${TN_TARGET_SLICES:-}"
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
DTYPE="${DTYPE:-complex64}"
TREE_DIR="${TREE_DIR:-trees/contest_tn}"
DASK_ADDRESS="${DASK_ADDRESS:-tcp://10.20.1.103:8786}"
MPIEXEC_FULL="${MPIEXEC_FULL:-mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2}"
SYNC_TREES="${SYNC_TREES:-1}"
SYNC_HOSTS="${SYNC_HOSTS:-${WORKER_HOSTS:-}}"
SSH_BIN="${SSH_BIN:-ssh}"
export TCM_ENABLE="${TCM_ENABLE:-1}"
tn_slice_args=(--tn-target-size "$TN_TARGET_SIZE")
if [[ -n "$TN_TARGET_SLICES" ]]; then
tn_slice_args+=(--tn-target-slices "$TN_TARGET_SLICES")
fi
is_local_host() {
local host="$1"
[[ "$host" == "localhost" || "$host" == "127.0.0.1" ]] && return 0
[[ "$host" == "$(hostname)" ]] && return 0
[[ "$host" == "$(hostname -f 2>/dev/null || true)" ]] && return 0
hostname -I 2>/dev/null | tr ' ' '\n' | grep -qx "$host"
}
sync_trees_to_hosts() {
[[ "$SYNC_TREES" == "1" ]] || return 0
[[ -n "$SYNC_HOSTS" ]] || return 0
local src_dir="$TREE_DIR"
local dst_dir="$TREE_DIR"
if [[ "$TREE_DIR" != /* ]]; then
src_dir="$ROOT_DIR/$TREE_DIR"
dst_dir="$ROOT_DIR/$TREE_DIR"
fi
for host in $SYNC_HOSTS; do
is_local_host "$host" && continue
echo "Sync tree dir to $host:$dst_dir"
"$SSH_BIN" "$host" "mkdir -p $(printf '%q' "$dst_dir")"
if command -v rsync >/dev/null 2>&1; then
rsync -a "$src_dir/" "$host:$dst_dir/"
else
scp -q "$src_dir"/*.pkl "$host:$dst_dir/"
fi
done
}
tools/manage_tn_dask_cluster.sh start
echo "Search with dask: $DASK_ADDRESS"
"$PYTHON_BIN" -u tools/tn_contest_runner.py search \
--case "$CASE" \
--nqubits "$NQUBITS" \
--nlayers "$NLAYERS" \
--observables $OBSERVABLES \
--tree-dir "$TREE_DIR" \
--dask-address "$DASK_ADDRESS" \
--torch-threads "$TORCH_THREADS" \
--dtype "$DTYPE" \
--tn-search-repeats "$SEARCH_REPEATS" \
--tn-search-time "$SEARCH_TIME" \
"${tn_slice_args[@]}"
sync_trees_to_hosts
echo "Contract with MPI: $MPIEXEC_FULL"
read -r -a mpi_prefix <<< "$MPIEXEC_FULL"
"${mpi_prefix[@]}" "$PYTHON_BIN" -u tools/tn_contest_runner.py contract \
--mpi \
--case "$CASE" \
--nqubits "$NQUBITS" \
--nlayers "$NLAYERS" \
--observables $OBSERVABLES \
--tree-dir "$TREE_DIR" \
--torch-threads "$TORCH_THREADS" \
--dtype "$DTYPE" \
"${tn_slice_args[@]}"