# TN ```bash # search + contract,Open MPI 多节点:每节点 2 rank,每 rank 绑定 1 个 NUMA。 # MPI_HOSTS 里每个节点写 :2,MPI_RANKS = 节点数 * 2。 # 每个 rank 使用 MPI_PE 个 core;这台 2-NUMA AMD 节点用 MPI_PE=128。 NQUBITS=40 \ TN_DEBUG_TRIALS=1 \ SCHEDULER_HOST=10.20.1.100 \ DASK_ADDRESS=tcp://10.20.1.100:8786 \ WORKER_HOSTS="10.20.1.100 10.20.1.101 10.20.1.102 10.20.1.103" \ CASE=main1 \ OBSERVABLES=long_z_string \ TORCH_THREADS=80 \ MPI_PE=80 \ MPI_MAP_BY=ppr:1:numa:PE=80 \ MPI_BIND_TO=core \ OMP_NUM_THREADS=80 \ MKL_NUM_THREADS=80 \ BLIS_NUM_THREADS=80 \ MPI_HOSTS="node-0:2,node-1:2,node-2:2,node-3:2" \ MPI_RANKS=8 \ NWORKERS=96 \ TN_TARGET_SIZE=17179869184 \ tools/run_tn_dask_mpi_all.sh # 单独缩并contract计算 mpirun --map-by ppr:1:numa:PE=80 --bind-to core --report-bindings \ -x LD_PRELOAD=/home/aocc/aocl/5.2.0/aocc/lib_LP64/libblis-mt.so.5 \ -x BLIS_NUM_THREADS=80 \ -x OMP_NUM_THREADS=80 \ -x MKL_NUM_THREADS=80 \ -x OMP_PROC_BIND=close \ -x OMP_PLACES=cores \ -np 8 \ -host node-0:2,node-1:2,node-2:2,node-3:2 \ .venv/bin/python -u tools/tn_contest_runner.py contract \ --mpi \ --case main1 \ --nqubits 34 \ --nlayers 20 \ --observables long_z_string \ --tree-dir trees/contest_tn \ --torch-threads 80 \ --dtype complex64 ``` # MPS ``` cd /home/qibo/qibotn MPIEXEC=mpirun \ MPI_HOSTS="node-2:4,node-3:4" \ MPI_RANKS=8 \ MPI_PE=48 \ MPI_MAP_BY=ppr:2:numa:PE=48 \ MPI_BIND_TO=core \ MPI_REPORT_BINDINGS=1 \ TORCH_THREADS=48 \ OMP_NUM_THREADS=48 \ MKL_NUM_THREADS=48 \ BLIS_NUM_THREADS=48 \ OBS_FILTER=ring_xz \ MAIN1_NQ=128 \ MAIN1_LAYERS=24 \ MAIN1_BOND=1024 \ tools/run_vidal_mpi_contest_cases.sh main1 MPIEXEC=mpirun \ MPI_HOSTS="node-2:4" \ MPI_RANKS=4 \ MPI_PE=48 \ MPI_MAP_BY=ppr:2:numa:PE=48 \ MPI_BIND_TO=core \ MPI_REPORT_BINDINGS=1 \ TORCH_THREADS=48 \ OMP_NUM_THREADS=48 \ MKL_NUM_THREADS=48 \ BLIS_NUM_THREADS=48 \ OBS_FILTER=ring_xz \ MAIN1_NQ=128 \ MAIN1_LAYERS=24 \ MAIN1_BOND=1024 \ tools/run_vidal_mpi_contest_cases.sh main1 ```