决赛现场脚本
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
Some checks failed
Build wheels / build (ubuntu-latest, 3.11) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.12) (push) Has been cancelled
Build wheels / build (ubuntu-latest, 3.13) (push) Has been cancelled
Tests / check (push) Has been cancelled
Tests / build (ubuntu-latest, 3.11) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.12) (push) Has been cancelled
Tests / build (ubuntu-latest, 3.13) (push) Has been cancelled
This commit is contained in:
@@ -11,10 +11,15 @@ set -euo pipefail
|
||||
#
|
||||
# Common overrides:
|
||||
# PYTHON_BIN=.venv/bin/python
|
||||
# MPIEXEC=mpiexec
|
||||
# MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2"
|
||||
# MPIEXEC=mpirun
|
||||
# MPI_HOSTS="node-1:2,node-2:2,node-3:2,node-0:2"
|
||||
# MPI_RANKS=8
|
||||
# MPI_PE=128
|
||||
# MPI_MAP_BY=ppr:1:numa:PE=128
|
||||
# MPI_BIND_TO=core
|
||||
# MPIEXEC_FULL="mpirun --map-by ppr:1:numa:PE=128 --bind-to core -np 8 -host node-1:2,node-2:2,node-3:2,node-0:2"
|
||||
# HOSTFILE=hostfile # optional; used only if the file exists
|
||||
# RANKS=8
|
||||
# RANKS=8 # fallback if MPI_RANKS is not set
|
||||
# TORCH_THREADS=8
|
||||
# CUT_RATIO=1e-12
|
||||
# OBS_FILTER="boundary_ZZ_q2 ring_xz dense3_spread complex_iZ0"
|
||||
@@ -28,12 +33,23 @@ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$ROOT_DIR"
|
||||
|
||||
PYTHON_BIN="${PYTHON_BIN:-.venv/bin/python}"
|
||||
MPIEXEC="${MPIEXEC:-mpiexec}"
|
||||
HOSTFILE="${HOSTFILE:-}"
|
||||
MPIEXEC="${MPIEXEC:-mpirun}"
|
||||
MPIEXEC_FULL="${MPIEXEC_FULL:-}"
|
||||
MPI_HOSTS="${MPI_HOSTS:-}"
|
||||
MPI_HOSTFILE="${MPI_HOSTFILE:-${HOSTFILE:-}}"
|
||||
MPI_RANKS="${MPI_RANKS:-${RANKS:-}}"
|
||||
RANKS="${RANKS:-4}"
|
||||
TORCH_THREADS="${TORCH_THREADS:-1}"
|
||||
MPI_PE="${MPI_PE:-$TORCH_THREADS}"
|
||||
MPI_MAP_BY="${MPI_MAP_BY:-ppr:1:numa:PE=$MPI_PE}"
|
||||
MPI_BIND_TO="${MPI_BIND_TO:-core}"
|
||||
MPI_REPORT_BINDINGS="${MPI_REPORT_BINDINGS:-0}"
|
||||
MPI_EXPORT_ENV="${MPI_EXPORT_ENV:-1}"
|
||||
CUT_RATIO="${CUT_RATIO:-1e-12}"
|
||||
OBS_FILTER="${OBS_FILTER:-}"
|
||||
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-$TORCH_THREADS}"
|
||||
export MKL_NUM_THREADS="${MKL_NUM_THREADS:-$TORCH_THREADS}"
|
||||
source "$ROOT_DIR/tools/qibotn_torch_mt_env.sh"
|
||||
|
||||
RUNNER_DIR="$ROOT_DIR/.tmp"
|
||||
mkdir -p "$RUNNER_DIR"
|
||||
@@ -238,15 +254,68 @@ if __name__ == "__main__":
|
||||
main()
|
||||
PY
|
||||
|
||||
if [[ -n "${MPIEXEC_FULL:-}" ]]; then
|
||||
read -r -a mpi_prefix <<< "$MPIEXEC_FULL"
|
||||
else
|
||||
mpi_prefix=("$MPIEXEC")
|
||||
if [[ -n "$HOSTFILE" && -f "$HOSTFILE" ]]; then
|
||||
mpi_prefix+=("-hostfile" "$HOSTFILE")
|
||||
sum_host_slots() {
|
||||
local hosts="$1"
|
||||
local total=0
|
||||
local item slots
|
||||
IFS=',' read -r -a host_items <<< "$hosts"
|
||||
for item in "${host_items[@]}"; do
|
||||
if [[ "$item" == *:* ]]; then
|
||||
slots="${item##*:}"
|
||||
else
|
||||
slots=1
|
||||
fi
|
||||
total=$((total + slots))
|
||||
done
|
||||
echo "$total"
|
||||
}
|
||||
|
||||
append_mpi_env_args() {
|
||||
[[ "$MPI_EXPORT_ENV" == "1" ]] || return 0
|
||||
mpi_prefix+=(
|
||||
-x "LD_PRELOAD=${LD_PRELOAD:-}"
|
||||
-x "BLIS_NUM_THREADS=$BLIS_NUM_THREADS"
|
||||
-x "OMP_NUM_THREADS=$OMP_NUM_THREADS"
|
||||
-x "MKL_NUM_THREADS=$MKL_NUM_THREADS"
|
||||
-x "OMP_PROC_BIND=$OMP_PROC_BIND"
|
||||
-x "OMP_PLACES=$OMP_PLACES"
|
||||
)
|
||||
}
|
||||
|
||||
build_mpi_prefix() {
|
||||
if [[ -n "$MPIEXEC_FULL" ]]; then
|
||||
# shellcheck disable=SC2206
|
||||
mpi_prefix=($MPIEXEC_FULL)
|
||||
append_mpi_env_args
|
||||
return
|
||||
fi
|
||||
mpi_prefix+=("-n" "$RANKS")
|
||||
fi
|
||||
|
||||
local ranks="$MPI_RANKS"
|
||||
if [[ -z "$ranks" && -n "$MPI_HOSTS" ]]; then
|
||||
ranks="$(sum_host_slots "$MPI_HOSTS")"
|
||||
fi
|
||||
if [[ -z "$ranks" ]]; then
|
||||
ranks="$RANKS"
|
||||
fi
|
||||
|
||||
mpi_prefix=(
|
||||
"$MPIEXEC"
|
||||
--map-by "$MPI_MAP_BY"
|
||||
--bind-to "$MPI_BIND_TO"
|
||||
-np "$ranks"
|
||||
)
|
||||
if [[ "$MPI_REPORT_BINDINGS" == "1" ]]; then
|
||||
mpi_prefix+=(--report-bindings)
|
||||
fi
|
||||
append_mpi_env_args
|
||||
if [[ -n "$MPI_HOSTS" ]]; then
|
||||
mpi_prefix+=(-host "$MPI_HOSTS")
|
||||
elif [[ -n "$MPI_HOSTFILE" ]]; then
|
||||
mpi_prefix+=(-hostfile "$MPI_HOSTFILE")
|
||||
fi
|
||||
}
|
||||
|
||||
build_mpi_prefix
|
||||
|
||||
run_case() {
|
||||
local label="$1"
|
||||
@@ -323,7 +392,12 @@ Cases:
|
||||
Common overrides:
|
||||
PYTHON_BIN=.venv/bin/python
|
||||
MPIEXEC=mpiexec
|
||||
MPIEXEC_FULL="mpirun -np 4 -hostfile /home/yx/qibotn/hostfile -perhost 2"
|
||||
MPI_HOSTS="node-1:2,node-2:2,node-3:2,node-0:2"
|
||||
MPI_RANKS=8
|
||||
MPI_PE=128
|
||||
MPI_MAP_BY=ppr:1:numa:PE=128
|
||||
MPI_BIND_TO=core
|
||||
MPIEXEC_FULL="mpirun --map-by ppr:1:numa:PE=128 --bind-to core -np 8 -host node-1:2,node-2:2,node-3:2,node-0:2"
|
||||
HOSTFILE=hostfile
|
||||
RANKS=8
|
||||
TORCH_THREADS=8
|
||||
|
||||
Reference in New Issue
Block a user