From 65788be1b3589cf2d8107a1ebbb84c1dc34eebd6 Mon Sep 17 00:00:00 2001 From: qhy <2728290997@qq.com> Date: Wed, 18 Feb 2026 19:14:55 +0800 Subject: [PATCH] =?UTF-8?q?=E6=88=90=E5=8A=9F=E7=9A=84=E5=B0=9D=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 + run_all_case.sh | 114 ++++ run_all_cases_20260218_190150.log | 504 ++++++++++++++++++ run_all_psnr.sh | 61 +++ scripts/evaluation/world_model_interaction.py | 5 + scripts/export_trt.py | 87 +++ src/unifolm_wma/modules/networks/wma_model.py | 85 +-- src/unifolm_wma/trt_utils.py | 151 ++++++ unitree_g1_pack_camera/case1/output.log | 179 +++++++ unitree_g1_pack_camera/case1/psnr_result.json | 5 + unitree_g1_pack_camera/case2/output.log | 179 +++++++ unitree_g1_pack_camera/case2/psnr_result.json | 5 + unitree_g1_pack_camera/case3/output.log | 146 +++++ unitree_g1_pack_camera/case3/psnr_result.json | 5 + unitree_g1_pack_camera/case4/psnr_result.json | 5 + .../case2/psnr_result.json | 5 + .../case3/psnr_result.json | 5 + .../case4/psnr_result.json | 5 + .../case1/psnr_result.json | 5 + .../case2/psnr_result.json | 5 + .../case3/psnr_result.json | 5 + .../case4/psnr_result.json | 5 + .../case1/output.log | 34 +- .../case1/psnr_result.json | 6 +- .../case2/psnr_result.json | 5 + .../case3/psnr_result.json | 5 + .../case4/output.log | 179 +++++++ .../case4/psnr_result.json | 5 + unitree_z1_stackbox/case1/psnr_result.json | 5 + unitree_z1_stackbox/case2/psnr_result.json | 5 + unitree_z1_stackbox/case3/psnr_result.json | 5 + unitree_z1_stackbox/case4/psnr_result.json | 5 + 32 files changed, 1772 insertions(+), 50 deletions(-) create mode 100644 run_all_case.sh create mode 100644 run_all_cases_20260218_190150.log create mode 100644 run_all_psnr.sh create mode 100644 scripts/export_trt.py create mode 100644 src/unifolm_wma/trt_utils.py create mode 100644 unitree_g1_pack_camera/case1/output.log create mode 100644 unitree_g1_pack_camera/case1/psnr_result.json create mode 100644 unitree_g1_pack_camera/case2/output.log create mode 100644 unitree_g1_pack_camera/case2/psnr_result.json create mode 100644 unitree_g1_pack_camera/case3/output.log create mode 100644 unitree_g1_pack_camera/case3/psnr_result.json create mode 100644 unitree_g1_pack_camera/case4/psnr_result.json create mode 100644 unitree_z1_dual_arm_cleanup_pencils/case2/psnr_result.json create mode 100644 unitree_z1_dual_arm_cleanup_pencils/case3/psnr_result.json create mode 100644 unitree_z1_dual_arm_cleanup_pencils/case4/psnr_result.json create mode 100644 unitree_z1_dual_arm_stackbox/case1/psnr_result.json create mode 100644 unitree_z1_dual_arm_stackbox/case2/psnr_result.json create mode 100644 unitree_z1_dual_arm_stackbox/case3/psnr_result.json create mode 100644 unitree_z1_dual_arm_stackbox/case4/psnr_result.json create mode 100644 unitree_z1_dual_arm_stackbox_v2/case2/psnr_result.json create mode 100644 unitree_z1_dual_arm_stackbox_v2/case3/psnr_result.json create mode 100644 unitree_z1_dual_arm_stackbox_v2/case4/output.log create mode 100644 unitree_z1_dual_arm_stackbox_v2/case4/psnr_result.json create mode 100644 unitree_z1_stackbox/case1/psnr_result.json create mode 100644 unitree_z1_stackbox/case2/psnr_result.json create mode 100644 unitree_z1_stackbox/case3/psnr_result.json create mode 100644 unitree_z1_stackbox/case4/psnr_result.json diff --git a/.gitignore b/.gitignore index a661276..dc283c2 100644 --- a/.gitignore +++ b/.gitignore @@ -132,3 +132,5 @@ Experiment/log *.0 ckpts/unifolm_wma_dual.ckpt.prepared.pt +trt_engines/video_backbone.engine +trt_engines/video_backbone.onnx diff --git a/run_all_case.sh b/run_all_case.sh new file mode 100644 index 0000000..6252554 --- /dev/null +++ b/run_all_case.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +# 自动执行所有场景的所有case +# 总共5个场景,每个场景4个case,共20个case +# 设置环境变量(离线模式) +export HF_HUB_OFFLINE=1 +export TRANSFORMERS_OFFLINE=1 + +# 颜色定义 +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# 定义所有场景 +SCENARIOS=( + "unitree_g1_pack_camera" + "unitree_z1_dual_arm_cleanup_pencils" + "unitree_z1_dual_arm_stackbox" + "unitree_z1_dual_arm_stackbox_v2" + "unitree_z1_stackbox" +) + +# 定义case数量 +CASES=(1 2 3 4) + +# 记录开始时间 +START_TIME=$(date +%s) +LOG_FILE="run_all_cases_$(date +%Y%m%d_%H%M%S).log" + +echo -e "${BLUE}========================================${NC}" +echo -e "${BLUE}开始执行所有场景的case${NC}" +echo -e "${BLUE}总共: ${#SCENARIOS[@]} 个场景 x ${#CASES[@]} 个case = $((${#SCENARIOS[@]} * ${#CASES[@]})) 个任务${NC}" +echo -e "${BLUE}日志文件: ${LOG_FILE}${NC}" +echo -e "${BLUE}========================================${NC}" +echo "" + +# 初始化计数器 +TOTAL_CASES=$((${#SCENARIOS[@]} * ${#CASES[@]})) +CURRENT_CASE=0 +SUCCESS_COUNT=0 +FAIL_COUNT=0 + +# 记录失败的case +declare -a FAILED_CASES + +# 遍历所有场景 +for scenario in "${SCENARIOS[@]}"; do + echo -e "${YELLOW}>>> 场景: ${scenario}${NC}" + + # 遍历所有case + for case_num in "${CASES[@]}"; do + CURRENT_CASE=$((CURRENT_CASE + 1)) + case_dir="${scenario}/case${case_num}" + script_path="${case_dir}/run_world_model_interaction.sh" + + echo -e "${BLUE}[${CURRENT_CASE}/${TOTAL_CASES}] 执行: ${case_dir}${NC}" + + # 检查脚本是否存在 + if [ ! -f "${script_path}" ]; then + echo -e "${RED}错误: 脚本不存在 ${script_path}${NC}" + FAIL_COUNT=$((FAIL_COUNT + 1)) + FAILED_CASES+=("${case_dir} (脚本不存在)") + continue + fi + + # 执行脚本 + echo "开始时间: $(date '+%Y-%m-%d %H:%M:%S')" + + if bash "${script_path}" >> "${LOG_FILE}" 2>&1; then + echo -e "${GREEN}✓ 成功: ${case_dir}${NC}" + SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) + else + echo -e "${RED}✗ 失败: ${case_dir}${NC}" + FAIL_COUNT=$((FAIL_COUNT + 1)) + FAILED_CASES+=("${case_dir}") + fi + + echo "结束时间: $(date '+%Y-%m-%d %H:%M:%S')" + echo "" + done + + echo "" +done + +# 计算总耗时 +END_TIME=$(date +%s) +DURATION=$((END_TIME - START_TIME)) +HOURS=$((DURATION / 3600)) +MINUTES=$(((DURATION % 3600) / 60)) +SECONDS=$((DURATION % 60)) + +# 输出总结 +echo -e "${BLUE}========================================${NC}" +echo -e "${BLUE}执行完成!${NC}" +echo -e "${BLUE}========================================${NC}" +echo -e "总任务数: ${TOTAL_CASES}" +echo -e "${GREEN}成功: ${SUCCESS_COUNT}${NC}" +echo -e "${RED}失败: ${FAIL_COUNT}${NC}" +echo -e "总耗时: ${HOURS}小时 ${MINUTES}分钟 ${SECONDS}秒" +echo -e "详细日志: ${LOG_FILE}" +echo "" + +# 如果有失败的case,列出来 +if [ ${FAIL_COUNT} -gt 0 ]; then + echo -e "${RED}失败的case列表:${NC}" + for failed_case in "${FAILED_CASES[@]}"; do + echo -e "${RED} - ${failed_case}${NC}" + done + echo "" +fi + +echo -e "${BLUE}========================================${NC}" diff --git a/run_all_cases_20260218_190150.log b/run_all_cases_20260218_190150.log new file mode 100644 index 0000000..ebf4a47 --- /dev/null +++ b/run_all_cases_20260218_190150.log @@ -0,0 +1,504 @@ +2026-02-18 19:01:56.891895: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2026-02-18 19:01:56.940243: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered +2026-02-18 19:01:56.940285: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered +2026-02-18 19:01:56.941395: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered +2026-02-18 19:01:56.948327: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +2026-02-18 19:01:57.870809: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +Global seed set to 123 +>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ... +>>> Prepared model loaded. +INFO:root:***** Configing Data ***** +>>> unitree_z1_stackbox: 1 data samples loaded. +>>> unitree_z1_stackbox: data stats loaded. +>>> unitree_z1_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox: data stats loaded. +>>> unitree_z1_dual_arm_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded. +>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated. +>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated. +>>> unitree_g1_pack_camera: 1 data samples loaded. +>>> unitree_g1_pack_camera: data stats loaded. +>>> unitree_g1_pack_camera: normalizer initiated. +>>> Dataset is successfully loaded ... + ✓ KV fused: 66 attention layers + TRT output 'y': [1, 4, 16, 40, 64] torch.float32 + TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32 + TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32 +>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine +>>> Generate 16 frames under each generation ... +DEBUG:h5py._conv:Creating converter from 3 to 5 +DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13 +DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9 +DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096 + 0%| | 0/11 [00:00>> Step 0: generating actions ... +>>> Step 0: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 1: generating actions ... +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin + 18%|█▊ | 2/11 [00:33<02:31, 16.87s/it] 27%|██▋ | 3/11 [00:50<02:14, 16.76s/it] 36%|███▋ | 4/11 [01:07<01:57, 16.81s/it] 45%|████▌ | 5/11 [01:24<01:41, 16.85s/it] 55%|█████▍ | 6/11 [01:41<01:24, 16.82s/it] 64%|██████▎ | 7/11 [01:57<01:07, 16.82s/it] 73%|███████▎ | 8/11 [02:14<00:50, 16.83s/it] 82%|████████▏ | 9/11 [02:31<00:33, 16.80s/it] 91%|█████████ | 10/11 [02:48<00:16, 16.81s/it] 100%|██████████| 11/11 [03:05<00:00, 16.81s/it] 100%|██████████| 11/11 [03:05<00:00, 16.83s/it] +>>> Step 1: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 2: generating actions ... +>>> Step 2: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 3: generating actions ... +>>> Step 3: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 4: generating actions ... +>>> Step 4: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 5: generating actions ... +>>> Step 5: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 6: generating actions ... +>>> Step 6: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 7: generating actions ... +>>> Step 7: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 8: generating actions ... +>>> Step 8: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 9: generating actions ... +>>> Step 9: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 10: generating actions ... +>>> Step 10: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> + +real 3m49.072s +user 4m16.055s +sys 0m44.636s +2026-02-18 19:05:45.956647: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2026-02-18 19:05:46.004149: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered +2026-02-18 19:05:46.004193: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered +2026-02-18 19:05:46.005265: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered +2026-02-18 19:05:46.012074: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +2026-02-18 19:05:46.932966: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +Global seed set to 123 +>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ... +>>> Prepared model loaded. +INFO:root:***** Configing Data ***** +>>> unitree_z1_stackbox: 1 data samples loaded. +>>> unitree_z1_stackbox: data stats loaded. +>>> unitree_z1_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox: data stats loaded. +>>> unitree_z1_dual_arm_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded. +>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated. +>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated. +>>> unitree_g1_pack_camera: 1 data samples loaded. +>>> unitree_g1_pack_camera: data stats loaded. +>>> unitree_g1_pack_camera: normalizer initiated. +>>> Dataset is successfully loaded ... + ✓ KV fused: 66 attention layers + TRT output 'y': [1, 4, 16, 40, 64] torch.float32 + TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32 + TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32 +>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine +>>> Generate 16 frames under each generation ... +DEBUG:h5py._conv:Creating converter from 3 to 5 +DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13 +DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9 +DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096 + 0%| | 0/11 [00:00>> Step 0: generating actions ... +>>> Step 0: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 1: generating actions ... +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin + 18%|█▊ | 2/11 [00:33<02:30, 16.75s/it] 27%|██▋ | 3/11 [00:50<02:15, 16.91s/it] 36%|███▋ | 4/11 [01:07<01:59, 17.02s/it] 45%|████▌ | 5/11 [01:24<01:41, 16.98s/it] 55%|█████▍ | 6/11 [01:41<01:24, 16.94s/it] 64%|██████▎ | 7/11 [01:58<01:07, 16.90s/it] 73%|███████▎ | 8/11 [02:15<00:50, 16.83s/it] 82%|████████▏ | 9/11 [02:31<00:33, 16.80s/it] 91%|█████████ | 10/11 [02:49<00:16, 16.94s/it] 100%|██████████| 11/11 [03:06<00:00, 16.97s/it] 100%|██████████| 11/11 [03:06<00:00, 16.91s/it] +>>> Step 1: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 2: generating actions ... +>>> Step 2: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 3: generating actions ... +>>> Step 3: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 4: generating actions ... +>>> Step 4: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 5: generating actions ... +>>> Step 5: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 6: generating actions ... +>>> Step 6: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 7: generating actions ... +>>> Step 7: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 8: generating actions ... +>>> Step 8: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 9: generating actions ... +>>> Step 9: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 10: generating actions ... +>>> Step 10: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> + +real 3m49.162s +user 4m12.814s +sys 0m45.565s +2026-02-18 19:09:35.113634: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2026-02-18 19:09:35.161428: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered +2026-02-18 19:09:35.161474: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered +2026-02-18 19:09:35.162551: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered +2026-02-18 19:09:35.169325: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +2026-02-18 19:09:36.089250: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +Global seed set to 123 +>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ... +>>> Prepared model loaded. +INFO:root:***** Configing Data ***** +>>> unitree_z1_stackbox: 1 data samples loaded. +>>> unitree_z1_stackbox: data stats loaded. +>>> unitree_z1_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox: data stats loaded. +>>> unitree_z1_dual_arm_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded. +>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated. +>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated. +>>> unitree_g1_pack_camera: 1 data samples loaded. +>>> unitree_g1_pack_camera: data stats loaded. +>>> unitree_g1_pack_camera: normalizer initiated. +>>> Dataset is successfully loaded ... + ✓ KV fused: 66 attention layers + TRT output 'y': [1, 4, 16, 40, 64] torch.float32 + TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32 + TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32 +>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine +>>> Generate 16 frames under each generation ... +DEBUG:h5py._conv:Creating converter from 3 to 5 +DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13 +DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9 +DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096 + 0%| | 0/11 [00:00>> Step 0: generating actions ... +>>> Step 0: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 1: generating actions ... +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin + 18%|█▊ | 2/11 [00:33<02:29, 16.64s/it] 27%|██▋ | 3/11 [00:49<02:13, 16.67s/it] 36%|███▋ | 4/11 [01:07<01:58, 16.94s/it] 45%|████▌ | 5/11 [01:24<01:42, 17.08s/it] 55%|█████▍ | 6/11 [01:41<01:25, 17.05s/it] 64%|██████▎ | 7/11 [01:58<01:08, 17.09s/it] 73%|███████▎ | 8/11 [02:15<00:51, 17.02s/it] 82%|████████▏ | 9/11 [02:33<00:34, 17.18s/it] 91%|█████████ | 10/11 [02:50<00:17, 17.29s/it] 100%|██████████| 11/11 [03:07<00:00, 17.27s/it] 100%|██████████| 11/11 [03:07<00:00, 17.09s/it] diff --git a/run_all_psnr.sh b/run_all_psnr.sh new file mode 100644 index 0000000..863edc0 --- /dev/null +++ b/run_all_psnr.sh @@ -0,0 +1,61 @@ +#!/bin/bash +set -e + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +cd "$SCRIPT_DIR" + +SCENARIOS=( + unitree_g1_pack_camera + unitree_z1_dual_arm_cleanup_pencils + unitree_z1_dual_arm_stackbox + unitree_z1_dual_arm_stackbox_v2 + unitree_z1_stackbox +) + +CASES=(case1 case2 case3 case4) + +total=0 +success=0 +fail=0 + +for scenario in "${SCENARIOS[@]}"; do + for case in "${CASES[@]}"; do + case_dir="${scenario}/${case}" + gt_video="${case_dir}/${scenario}_${case}.mp4" + pred_video=$(ls "${case_dir}"/output/inference/*_full_fs*.mp4 2>/dev/null | head -1) + output_file="${case_dir}/psnr_result.json" + + total=$((total + 1)) + echo "==========================================" + echo "[${total}/20] ${case_dir}" + + if [ ! -f "$gt_video" ]; then + echo " SKIP: GT video not found: $gt_video" + fail=$((fail + 1)) + continue + fi + if [ -z "$pred_video" ]; then + echo " SKIP: pred video not found in ${case_dir}/output/inference/" + fail=$((fail + 1)) + continue + fi + + echo " GT: $gt_video" + echo " Pred: $pred_video" + echo " Out: $output_file" + + if python3 psnr_score_for_challenge.py \ + --gt_video "$gt_video" \ + --pred_video "$pred_video" \ + --output_file "$output_file"; then + success=$((success + 1)) + echo " DONE" + else + fail=$((fail + 1)) + echo " FAILED" + fi + done +done + +echo "==========================================" +echo "Finished: ${success} success, ${fail} fail, ${total} total" diff --git a/scripts/evaluation/world_model_interaction.py b/scripts/evaluation/world_model_interaction.py index 4b02d3e..f324792 100644 --- a/scripts/evaluation/world_model_interaction.py +++ b/scripts/evaluation/world_model_interaction.py @@ -585,6 +585,11 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None: if isinstance(m, CrossAttention) and m.fuse_kv()) print(f" ✓ KV fused: {kv_count} attention layers") + # Load TRT backbone if engine exists + trt_engine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', 'trt_engines', 'video_backbone.engine') + if os.path.exists(trt_engine_path): + model.model.diffusion_model.load_trt_backbone(trt_engine_path) + # Run over data assert (args.height % 16 == 0) and ( args.width % 16 diff --git a/scripts/export_trt.py b/scripts/export_trt.py new file mode 100644 index 0000000..7854c25 --- /dev/null +++ b/scripts/export_trt.py @@ -0,0 +1,87 @@ +"""Export video UNet backbone to ONNX, then convert to TensorRT engine. + +Usage: + python scripts/export_trt.py \ + --ckpt ckpts/unifolm_wma_dual.ckpt.prepared.pt \ + --config configs/inference/world_model_interaction.yaml \ + --out_dir trt_engines +""" + +import os +import sys +import argparse + +import torch +import tensorrt as trt +from omegaconf import OmegaConf + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src')) +from unifolm_wma.utils.utils import instantiate_from_config +from unifolm_wma.trt_utils import export_backbone_onnx + + +def load_model(config_path, ckpt_path): + if ckpt_path.endswith('.prepared.pt'): + model = torch.load(ckpt_path, map_location='cpu') + else: + config = OmegaConf.load(config_path) + model = instantiate_from_config(config.model) + state_dict = torch.load(ckpt_path, map_location='cpu') + if 'state_dict' in state_dict: + state_dict = state_dict['state_dict'] + model.load_state_dict(state_dict, strict=False) + model.eval().cuda() + return model + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--ckpt', required=True) + parser.add_argument('--config', default='configs/inference/world_model_interaction.yaml') + parser.add_argument('--out_dir', default='trt_engines') + parser.add_argument('--context_len', type=int, default=95) + parser.add_argument('--fp16', action='store_true', default=True) + args = parser.parse_args() + + os.makedirs(args.out_dir, exist_ok=True) + onnx_path = os.path.join(args.out_dir, 'video_backbone.onnx') + engine_path = os.path.join(args.out_dir, 'video_backbone.engine') + + if os.path.exists(onnx_path): + print(f">>> ONNX already exists at {onnx_path}, skipping export.") + n_outputs = 10 + else: + print(">>> Loading model ...") + model = load_model(args.config, args.ckpt) + print(">>> Exporting ONNX ...") + with torch.no_grad(): + n_outputs = export_backbone_onnx(model, onnx_path, context_len=args.context_len) + del model + torch.cuda.empty_cache() + + print(">>> Converting ONNX -> TensorRT engine ...") + logger = trt.Logger(trt.Logger.WARNING) + builder = trt.Builder(logger) + network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) + parser = trt.OnnxParser(network, logger) + + if not parser.parse_from_file(os.path.abspath(onnx_path)): + for i in range(parser.num_errors): + print(f" ONNX parse error: {parser.get_error(i)}") + raise RuntimeError("ONNX parsing failed") + + config = builder.create_builder_config() + config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 16 << 30) + if args.fp16: + config.set_flag(trt.BuilderFlag.FP16) + + engine_bytes = builder.build_serialized_network(network, config) + with open(engine_path, 'wb') as f: + f.write(engine_bytes) + + print(f"\n>>> Done! Engine saved to {engine_path}") + print(f" Outputs: 1 y + {n_outputs - 1} hs_a tensors") + + +if __name__ == '__main__': + main() diff --git a/src/unifolm_wma/modules/networks/wma_model.py b/src/unifolm_wma/modules/networks/wma_model.py index 8ebb8cc..af841cf 100644 --- a/src/unifolm_wma/modules/networks/wma_model.py +++ b/src/unifolm_wma/modules/networks/wma_model.py @@ -688,6 +688,7 @@ class WMAModel(nn.Module): # Context precomputation cache self._ctx_cache_enabled = False self._ctx_cache = {} + self._trt_backbone = None # TRT engine for video UNet backbone # Reusable CUDA stream for parallel state_unet / action_unet self._state_stream = torch.cuda.Stream() @@ -700,6 +701,12 @@ class WMAModel(nn.Module): self.__dict__.update(state) self._state_stream = torch.cuda.Stream() + def load_trt_backbone(self, engine_path, n_hs_a=9): + """Load a TensorRT engine for the video UNet backbone.""" + from unifolm_wma.trt_utils import TRTBackbone + self._trt_backbone = TRTBackbone(engine_path, n_hs_a=n_hs_a) + print(f">>> TRT backbone loaded from {engine_path}") + def forward(self, x: Tensor, x_action: Tensor, @@ -812,44 +819,50 @@ class WMAModel(nn.Module): fs_embed = fs_embed.repeat_interleave(repeats=t, dim=0) emb = emb + fs_embed - h = x.type(self.dtype) - adapter_idx = 0 - hs = [] - hs_a = [] - for id, module in enumerate(self.input_blocks): - h = module(h, emb, context=context, batch_size=b) - if id == 0 and self.addition_attention: - h = self.init_attn(h, emb, context=context, batch_size=b) - # plug-in adapter features - if ((id + 1) % 3 == 0) and features_adapter is not None: - h = h + features_adapter[adapter_idx] - adapter_idx += 1 - if id != 0: - if isinstance(module[0], Downsample): + if self._trt_backbone is not None: + # TRT path: run backbone via TensorRT engine + h_in = x.type(self.dtype).contiguous() + y, hs_a = self._trt_backbone(h_in, emb.contiguous(), context.contiguous()) + else: + # PyTorch path: original backbone + h = x.type(self.dtype) + adapter_idx = 0 + hs = [] + hs_a = [] + for id, module in enumerate(self.input_blocks): + h = module(h, emb, context=context, batch_size=b) + if id == 0 and self.addition_attention: + h = self.init_attn(h, emb, context=context, batch_size=b) + # plug-in adapter features + if ((id + 1) % 3 == 0) and features_adapter is not None: + h = h + features_adapter[adapter_idx] + adapter_idx += 1 + if id != 0: + if isinstance(module[0], Downsample): + hs_a.append( + rearrange(hs[-1], '(b t) c h w -> b t c h w', t=t)) + hs.append(h) + hs_a.append(rearrange(h, '(b t) c h w -> b t c h w', t=t)) + + if features_adapter is not None: + assert len( + features_adapter) == adapter_idx, 'Wrong features_adapter' + h = self.middle_block(h, emb, context=context, batch_size=b) + hs_a.append(rearrange(h, '(b t) c h w -> b t c h w', t=t)) + + hs_out = [] + for module in self.output_blocks: + h = torch.cat([h, hs.pop()], dim=1) + h = module(h, emb, context=context, batch_size=b) + if isinstance(module[-1], Upsample): hs_a.append( - rearrange(hs[-1], '(b t) c h w -> b t c h w', t=t)) - hs.append(h) - hs_a.append(rearrange(h, '(b t) c h w -> b t c h w', t=t)) + rearrange(hs_out[-1], '(b t) c h w -> b t c h w', t=t)) + hs_out.append(h) + h = h.type(x.dtype) + hs_a.append(rearrange(hs_out[-1], '(b t) c h w -> b t c h w', t=t)) - if features_adapter is not None: - assert len( - features_adapter) == adapter_idx, 'Wrong features_adapter' - h = self.middle_block(h, emb, context=context, batch_size=b) - hs_a.append(rearrange(h, '(b t) c h w -> b t c h w', t=t)) - - hs_out = [] - for module in self.output_blocks: - h = torch.cat([h, hs.pop()], dim=1) - h = module(h, emb, context=context, batch_size=b) - if isinstance(module[-1], Upsample): - hs_a.append( - rearrange(hs_out[-1], '(b t) c h w -> b t c h w', t=t)) - hs_out.append(h) - h = h.type(x.dtype) - hs_a.append(rearrange(hs_out[-1], '(b t) c h w -> b t c h w', t=t)) - - y = self.out(h) - y = rearrange(y, '(b t) c h w -> b c t h w', b=b) + y = self.out(h) + y = rearrange(y, '(b t) c h w -> b c t h w', b=b) if not self.base_model_gen_only: ba, _, _ = x_action.shape diff --git a/src/unifolm_wma/trt_utils.py b/src/unifolm_wma/trt_utils.py new file mode 100644 index 0000000..9355991 --- /dev/null +++ b/src/unifolm_wma/trt_utils.py @@ -0,0 +1,151 @@ +"""TensorRT acceleration utilities for the video UNet backbone.""" + +import torch +import torch.nn as nn +from einops import rearrange +from unifolm_wma.modules.networks.wma_model import Downsample, Upsample + + +class VideoBackboneForExport(nn.Module): + """Wrapper that isolates the video UNet backbone for ONNX export. + + Takes already-preprocessed inputs (after context/time embedding prep) + and returns y + hs_a as a flat tuple. + """ + + def __init__(self, wma_model): + super().__init__() + self.input_blocks = wma_model.input_blocks + self.middle_block = wma_model.middle_block + self.output_blocks = wma_model.output_blocks + self.out = wma_model.out + self.addition_attention = wma_model.addition_attention + if self.addition_attention: + self.init_attn = wma_model.init_attn + self.dtype = wma_model.dtype + + def forward(self, h, emb, context): + t = 16 + b = 1 + + hs = [] + hs_a = [] + h = h.type(self.dtype) + for id, module in enumerate(self.input_blocks): + h = module(h, emb, context=context, batch_size=b) + if id == 0 and self.addition_attention: + h = self.init_attn(h, emb, context=context, batch_size=b) + if id != 0: + if isinstance(module[0], Downsample): + hs_a.append(rearrange(hs[-1], '(b t) c h w -> b t c h w', t=t)) + hs.append(h) + hs_a.append(rearrange(h, '(b t) c h w -> b t c h w', t=t)) + + h = self.middle_block(h, emb, context=context, batch_size=b) + hs_a.append(rearrange(h, '(b t) c h w -> b t c h w', t=t)) + + hs_out = [] + for module in self.output_blocks: + h = torch.cat([h, hs.pop()], dim=1) + h = module(h, emb, context=context, batch_size=b) + if isinstance(module[-1], Upsample): + hs_a.append(rearrange(hs_out[-1], '(b t) c h w -> b t c h w', t=t)) + hs_out.append(h) + hs_a.append(rearrange(hs_out[-1], '(b t) c h w -> b t c h w', t=t)) + + y = self.out(h.type(h.dtype)) + y = rearrange(y, '(b t) c h w -> b c t h w', b=b) + return (y, *hs_a) + + +def export_backbone_onnx(model, save_path, context_len=95): + wma = model.model.diffusion_model + wrapper = VideoBackboneForExport(wma) + wrapper.eval().cuda() + + for m in wrapper.modules(): + if hasattr(m, 'checkpoint'): + m.checkpoint = False + if hasattr(m, 'use_checkpoint'): + m.use_checkpoint = False + + import xformers.ops + _orig_mea = xformers.ops.memory_efficient_attention + def _sdpa_replacement(q, k, v, attn_bias=None, op=None, **kw): + return torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=attn_bias) + xformers.ops.memory_efficient_attention = _sdpa_replacement + + BT = 16 + emb_dim = wma.model_channels * 4 + ctx_dim = 1024 + in_ch = wma.in_channels + + dummy_h = torch.randn(BT, in_ch, 40, 64, device='cuda', dtype=torch.float32) + dummy_emb = torch.randn(BT, emb_dim, device='cuda', dtype=torch.float32) + dummy_ctx = torch.randn(BT, context_len, ctx_dim, device='cuda', dtype=torch.float32) + + with torch.no_grad(): + outputs = wrapper(dummy_h, dummy_emb, dummy_ctx) + n_outputs = len(outputs) + print(f">>> Backbone has {n_outputs} outputs (1 y + {n_outputs-1} hs_a)") + for i, o in enumerate(outputs): + print(f" output[{i}]: {o.shape} {o.dtype}") + + output_names = ['y'] + [f'hs_a_{i}' for i in range(n_outputs - 1)] + + torch.onnx.export( + wrapper, + (dummy_h, dummy_emb, dummy_ctx), + save_path, + input_names=['h', 'emb', 'context'], + output_names=output_names, + opset_version=17, + do_constant_folding=True, + ) + print(f">>> ONNX exported to {save_path}") + xformers.ops.memory_efficient_attention = _orig_mea + return n_outputs + + +class TRTBackbone: + """TensorRT runtime wrapper for the video UNet backbone.""" + + def __init__(self, engine_path, n_hs_a=9): + import tensorrt as trt + + self.logger = trt.Logger(trt.Logger.WARNING) + with open(engine_path, 'rb') as f: + runtime = trt.Runtime(self.logger) + self.engine = runtime.deserialize_cuda_engine(f.read()) + self.context = self.engine.create_execution_context() + self.n_hs_a = n_hs_a + + import numpy as np + self.output_buffers = {} + for i in range(self.engine.num_io_tensors): + name = self.engine.get_tensor_name(i) + if self.engine.get_tensor_mode(name) == trt.TensorIOMode.OUTPUT: + shape = self.engine.get_tensor_shape(name) + np_dtype = trt.nptype(self.engine.get_tensor_dtype(name)) + buf = torch.empty(list(shape), dtype=torch.from_numpy(np.empty(0, dtype=np_dtype)).dtype, device='cuda') + self.output_buffers[name] = buf + print(f" TRT output '{name}': {list(shape)} {buf.dtype}") + + def __call__(self, h, emb, context): + import tensorrt as trt + for name, tensor in [('h', h), ('emb', emb), ('context', context)]: + expected_dtype = trt.nptype(self.engine.get_tensor_dtype(name)) + torch_expected = torch.from_numpy(__import__('numpy').empty(0, dtype=expected_dtype)).dtype + if tensor.dtype != torch_expected: + tensor = tensor.to(torch_expected) + self.context.set_tensor_address(name, tensor.contiguous().data_ptr()) + + for name, buf in self.output_buffers.items(): + self.context.set_tensor_address(name, buf.data_ptr()) + + self.context.execute_async_v3(torch.cuda.current_stream().cuda_stream) + torch.cuda.synchronize() + + y = self.output_buffers['y'] + hs_a = [self.output_buffers[f'hs_a_{i}'] for i in range(self.n_hs_a)] + return y, hs_a diff --git a/unitree_g1_pack_camera/case1/output.log b/unitree_g1_pack_camera/case1/output.log new file mode 100644 index 0000000..c900887 --- /dev/null +++ b/unitree_g1_pack_camera/case1/output.log @@ -0,0 +1,179 @@ +2026-02-18 19:01:56.891895: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2026-02-18 19:01:56.940243: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered +2026-02-18 19:01:56.940285: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered +2026-02-18 19:01:56.941395: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered +2026-02-18 19:01:56.948327: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +2026-02-18 19:01:57.870809: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +Global seed set to 123 +>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ... +>>> Prepared model loaded. +INFO:root:***** Configing Data ***** +>>> unitree_z1_stackbox: 1 data samples loaded. +>>> unitree_z1_stackbox: data stats loaded. +>>> unitree_z1_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox: data stats loaded. +>>> unitree_z1_dual_arm_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded. +>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated. +>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated. +>>> unitree_g1_pack_camera: 1 data samples loaded. +>>> unitree_g1_pack_camera: data stats loaded. +>>> unitree_g1_pack_camera: normalizer initiated. +>>> Dataset is successfully loaded ... + ✓ KV fused: 66 attention layers + TRT output 'y': [1, 4, 16, 40, 64] torch.float32 + TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32 + TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32 +>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine +>>> Generate 16 frames under each generation ... +DEBUG:h5py._conv:Creating converter from 3 to 5 +DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13 +DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9 +DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096 + 0%| | 0/11 [00:00>> Step 0: generating actions ... +>>> Step 0: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 1: generating actions ... +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin + 18%|█▊ | 2/11 [00:33<02:31, 16.87s/it] 27%|██▋ | 3/11 [00:50<02:14, 16.76s/it] 36%|███▋ | 4/11 [01:07<01:57, 16.81s/it] 45%|████▌ | 5/11 [01:24<01:41, 16.85s/it] 55%|█████▍ | 6/11 [01:41<01:24, 16.82s/it] 64%|██████▎ | 7/11 [01:57<01:07, 16.82s/it] 73%|███████▎ | 8/11 [02:14<00:50, 16.83s/it] 82%|████████▏ | 9/11 [02:31<00:33, 16.80s/it] 91%|█████████ | 10/11 [02:48<00:16, 16.81s/it] 100%|██████████| 11/11 [03:05<00:00, 16.81s/it] 100%|██████████| 11/11 [03:05<00:00, 16.83s/it] +>>> Step 1: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 2: generating actions ... +>>> Step 2: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 3: generating actions ... +>>> Step 3: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 4: generating actions ... +>>> Step 4: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 5: generating actions ... +>>> Step 5: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 6: generating actions ... +>>> Step 6: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 7: generating actions ... +>>> Step 7: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 8: generating actions ... +>>> Step 8: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 9: generating actions ... +>>> Step 9: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 10: generating actions ... +>>> Step 10: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> + +real 3m49.072s +user 4m16.055s +sys 0m44.636s diff --git a/unitree_g1_pack_camera/case1/psnr_result.json b/unitree_g1_pack_camera/case1/psnr_result.json new file mode 100644 index 0000000..668fbe5 --- /dev/null +++ b/unitree_g1_pack_camera/case1/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_g1_pack_camera/case1/unitree_g1_pack_camera_case1.mp4", + "pred_video": "unitree_g1_pack_camera/case1/output/inference/0_full_fs6.mp4", + "psnr": 35.615362167470806 +} \ No newline at end of file diff --git a/unitree_g1_pack_camera/case2/output.log b/unitree_g1_pack_camera/case2/output.log new file mode 100644 index 0000000..d7d0360 --- /dev/null +++ b/unitree_g1_pack_camera/case2/output.log @@ -0,0 +1,179 @@ +2026-02-18 19:05:45.956647: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2026-02-18 19:05:46.004149: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered +2026-02-18 19:05:46.004193: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered +2026-02-18 19:05:46.005265: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered +2026-02-18 19:05:46.012074: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +2026-02-18 19:05:46.932966: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +Global seed set to 123 +>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ... +>>> Prepared model loaded. +INFO:root:***** Configing Data ***** +>>> unitree_z1_stackbox: 1 data samples loaded. +>>> unitree_z1_stackbox: data stats loaded. +>>> unitree_z1_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox: data stats loaded. +>>> unitree_z1_dual_arm_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded. +>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated. +>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated. +>>> unitree_g1_pack_camera: 1 data samples loaded. +>>> unitree_g1_pack_camera: data stats loaded. +>>> unitree_g1_pack_camera: normalizer initiated. +>>> Dataset is successfully loaded ... + ✓ KV fused: 66 attention layers + TRT output 'y': [1, 4, 16, 40, 64] torch.float32 + TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32 + TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32 +>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine +>>> Generate 16 frames under each generation ... +DEBUG:h5py._conv:Creating converter from 3 to 5 +DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13 +DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9 +DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096 + 0%| | 0/11 [00:00>> Step 0: generating actions ... +>>> Step 0: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 1: generating actions ... +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin + 18%|█▊ | 2/11 [00:33<02:30, 16.75s/it] 27%|██▋ | 3/11 [00:50<02:15, 16.91s/it] 36%|███▋ | 4/11 [01:07<01:59, 17.02s/it] 45%|████▌ | 5/11 [01:24<01:41, 16.98s/it] 55%|█████▍ | 6/11 [01:41<01:24, 16.94s/it] 64%|██████▎ | 7/11 [01:58<01:07, 16.90s/it] 73%|███████▎ | 8/11 [02:15<00:50, 16.83s/it] 82%|████████▏ | 9/11 [02:31<00:33, 16.80s/it] 91%|█████████ | 10/11 [02:49<00:16, 16.94s/it] 100%|██████████| 11/11 [03:06<00:00, 16.97s/it] 100%|██████████| 11/11 [03:06<00:00, 16.91s/it] +>>> Step 1: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 2: generating actions ... +>>> Step 2: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 3: generating actions ... +>>> Step 3: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 4: generating actions ... +>>> Step 4: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 5: generating actions ... +>>> Step 5: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 6: generating actions ... +>>> Step 6: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 7: generating actions ... +>>> Step 7: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 8: generating actions ... +>>> Step 8: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 9: generating actions ... +>>> Step 9: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 10: generating actions ... +>>> Step 10: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> + +real 3m49.162s +user 4m12.814s +sys 0m45.565s diff --git a/unitree_g1_pack_camera/case2/psnr_result.json b/unitree_g1_pack_camera/case2/psnr_result.json new file mode 100644 index 0000000..a22828d --- /dev/null +++ b/unitree_g1_pack_camera/case2/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_g1_pack_camera/case2/unitree_g1_pack_camera_case2.mp4", + "pred_video": "unitree_g1_pack_camera/case2/output/inference/50_full_fs6.mp4", + "psnr": 34.61979248212279 +} \ No newline at end of file diff --git a/unitree_g1_pack_camera/case3/output.log b/unitree_g1_pack_camera/case3/output.log new file mode 100644 index 0000000..e83b26c --- /dev/null +++ b/unitree_g1_pack_camera/case3/output.log @@ -0,0 +1,146 @@ +2026-02-18 19:09:35.113634: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2026-02-18 19:09:35.161428: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered +2026-02-18 19:09:35.161474: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered +2026-02-18 19:09:35.162551: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered +2026-02-18 19:09:35.169325: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +2026-02-18 19:09:36.089250: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +Global seed set to 123 +>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ... +>>> Prepared model loaded. +INFO:root:***** Configing Data ***** +>>> unitree_z1_stackbox: 1 data samples loaded. +>>> unitree_z1_stackbox: data stats loaded. +>>> unitree_z1_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox: data stats loaded. +>>> unitree_z1_dual_arm_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded. +>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated. +>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated. +>>> unitree_g1_pack_camera: 1 data samples loaded. +>>> unitree_g1_pack_camera: data stats loaded. +>>> unitree_g1_pack_camera: normalizer initiated. +>>> Dataset is successfully loaded ... + ✓ KV fused: 66 attention layers + TRT output 'y': [1, 4, 16, 40, 64] torch.float32 + TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32 + TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32 +>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine +>>> Generate 16 frames under each generation ... +DEBUG:h5py._conv:Creating converter from 3 to 5 +DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13 +DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9 +DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096 + 0%| | 0/11 [00:00>> Step 0: generating actions ... +>>> Step 0: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 1: generating actions ... +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin + 18%|█▊ | 2/11 [00:33<02:29, 16.64s/it] 27%|██▋ | 3/11 [00:49<02:13, 16.67s/it] 36%|███▋ | 4/11 [01:07<01:58, 16.94s/it] 45%|████▌ | 5/11 [01:24<01:42, 17.08s/it] 55%|█████▍ | 6/11 [01:41<01:25, 17.05s/it] 64%|██████▎ | 7/11 [01:58<01:08, 17.09s/it] 73%|███████▎ | 8/11 [02:15<00:51, 17.02s/it] 82%|████████▏ | 9/11 [02:33<00:34, 17.18s/it] 91%|█████████ | 10/11 [02:50<00:17, 17.29s/it] 100%|██████████| 11/11 [03:07<00:00, 17.27s/it] 100%|██████████| 11/11 [03:07<00:00, 17.09s/it] diff --git a/unitree_g1_pack_camera/case3/psnr_result.json b/unitree_g1_pack_camera/case3/psnr_result.json new file mode 100644 index 0000000..a9a2d4d --- /dev/null +++ b/unitree_g1_pack_camera/case3/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_g1_pack_camera/case3/unitree_g1_pack_camera_case3.mp4", + "pred_video": "unitree_g1_pack_camera/case3/output/inference/100_full_fs6.mp4", + "psnr": 37.034952654534486 +} \ No newline at end of file diff --git a/unitree_g1_pack_camera/case4/psnr_result.json b/unitree_g1_pack_camera/case4/psnr_result.json new file mode 100644 index 0000000..eba90df --- /dev/null +++ b/unitree_g1_pack_camera/case4/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_g1_pack_camera/case4/unitree_g1_pack_camera_case4.mp4", + "pred_video": "unitree_g1_pack_camera/case4/output/inference/200_full_fs6.mp4", + "psnr": 31.43390896360405 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_cleanup_pencils/case2/psnr_result.json b/unitree_z1_dual_arm_cleanup_pencils/case2/psnr_result.json new file mode 100644 index 0000000..9c73585 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case2/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_cleanup_pencils/case2/unitree_z1_dual_arm_cleanup_pencils_case2.mp4", + "pred_video": "unitree_z1_dual_arm_cleanup_pencils/case2/output/inference/50_full_fs4.mp4", + "psnr": 48.344571927558974 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_cleanup_pencils/case3/psnr_result.json b/unitree_z1_dual_arm_cleanup_pencils/case3/psnr_result.json new file mode 100644 index 0000000..eabf3e9 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case3/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_cleanup_pencils/case3/unitree_z1_dual_arm_cleanup_pencils_case3.mp4", + "pred_video": "unitree_z1_dual_arm_cleanup_pencils/case3/output/inference/100_full_fs4.mp4", + "psnr": 41.152374490134825 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_cleanup_pencils/case4/psnr_result.json b/unitree_z1_dual_arm_cleanup_pencils/case4/psnr_result.json new file mode 100644 index 0000000..fe01c52 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case4/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_cleanup_pencils/case4/unitree_z1_dual_arm_cleanup_pencils_case4.mp4", + "pred_video": "unitree_z1_dual_arm_cleanup_pencils/case4/output/inference/200_full_fs4.mp4", + "psnr": 46.025723557253855 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_stackbox/case1/psnr_result.json b/unitree_z1_dual_arm_stackbox/case1/psnr_result.json new file mode 100644 index 0000000..d4ec454 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case1/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_stackbox/case1/unitree_z1_dual_arm_stackbox_case1.mp4", + "pred_video": "unitree_z1_dual_arm_stackbox/case1/output/inference/5_full_fs4.mp4", + "psnr": 44.3480149502738 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_stackbox/case2/psnr_result.json b/unitree_z1_dual_arm_stackbox/case2/psnr_result.json new file mode 100644 index 0000000..4e41fef --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case2/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_stackbox/case2/unitree_z1_dual_arm_stackbox_case2.mp4", + "pred_video": "unitree_z1_dual_arm_stackbox/case2/output/inference/15_full_fs4.mp4", + "psnr": 39.867728254007716 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_stackbox/case3/psnr_result.json b/unitree_z1_dual_arm_stackbox/case3/psnr_result.json new file mode 100644 index 0000000..5ac702a --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case3/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_stackbox/case3/unitree_z1_dual_arm_stackbox_case3.mp4", + "pred_video": "unitree_z1_dual_arm_stackbox/case3/output/inference/25_full_fs4.mp4", + "psnr": 39.19101039445159 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_stackbox/case4/psnr_result.json b/unitree_z1_dual_arm_stackbox/case4/psnr_result.json new file mode 100644 index 0000000..821efad --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case4/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_stackbox/case4/unitree_z1_dual_arm_stackbox_case4.mp4", + "pred_video": "unitree_z1_dual_arm_stackbox/case4/output/inference/35_full_fs4.mp4", + "psnr": 40.29563315341769 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_stackbox_v2/case1/output.log b/unitree_z1_dual_arm_stackbox_v2/case1/output.log index 6395a97..d063c7e 100644 --- a/unitree_z1_dual_arm_stackbox_v2/case1/output.log +++ b/unitree_z1_dual_arm_stackbox_v2/case1/output.log @@ -1,10 +1,10 @@ -2026-02-11 11:59:27.241485: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. -2026-02-11 11:59:27.291755: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered -2026-02-11 11:59:27.291807: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered -2026-02-11 11:59:27.293169: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered -2026-02-11 11:59:27.300838: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +2026-02-18 18:49:49.117856: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2026-02-18 18:49:49.165270: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered +2026-02-18 18:49:49.165322: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered +2026-02-18 18:49:49.166382: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered +2026-02-18 18:49:49.173299: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. -2026-02-11 11:59:28.228009: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +2026-02-18 18:49:50.090214: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Global seed set to 123 >>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ... >>> Prepared model loaded. @@ -26,12 +26,24 @@ INFO:root:***** Configing Data ***** >>> unitree_g1_pack_camera: normalizer initiated. >>> Dataset is successfully loaded ... ✓ KV fused: 66 attention layers + TRT output 'y': [1, 4, 16, 40, 64] torch.float32 + TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32 + TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32 +>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine >>> Generate 16 frames under each generation ... DEBUG:h5py._conv:Creating converter from 3 to 5 DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13 DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9 DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096 - 0%| | 0/11 [00:00>> Step 0: generating actions ... + 0%| | 0/11 [00:00>> Step 0: generating actions ... >>> Step 0: interacting with world model ... >>>>>>>>>>>>>>>>>>>>>>>> >>> Step 1: generating actions ... @@ -84,7 +96,7 @@ DEBUG:PIL.Image:Importing WmfImagePlugin DEBUG:PIL.Image:Importing XbmImagePlugin DEBUG:PIL.Image:Importing XpmImagePlugin DEBUG:PIL.Image:Importing XVThumbImagePlugin - 18%|█▊ | 2/11 [01:08<05:07, 34.17s/it] 27%|██▋ | 3/11 [01:42<04:33, 34.16s/it] 36%|███▋ | 4/11 [02:16<03:59, 34.18s/it] 45%|████▌ | 5/11 [02:50<03:24, 34.14s/it] 55%|█████▍ | 6/11 [03:24<02:50, 34.10s/it] 64%|██████▎ | 7/11 [03:58<02:16, 34.07s/it] 73%|███████▎ | 8/11 [04:32<01:42, 34.03s/it] 82%|████████▏ | 9/11 [05:06<01:08, 34.02s/it] 91%|█████████ | 10/11 [05:40<00:34, 34.04s/it] 100%|██████████| 11/11 [06:14<00:00, 34.03s/it] 100%|██████████| 11/11 [06:14<00:00, 34.07s/it] + 18%|█▊ | 2/11 [00:31<02:21, 15.71s/it] 27%|██▋ | 3/11 [00:47<02:06, 15.86s/it] 36%|███▋ | 4/11 [01:03<01:51, 15.90s/it] 45%|████▌ | 5/11 [01:19<01:36, 16.06s/it] 55%|█████▍ | 6/11 [01:35<01:19, 15.98s/it] 64%|██████▎ | 7/11 [01:51<01:04, 16.09s/it] 73%|███████▎ | 8/11 [02:08<00:48, 16.08s/it] 82%|████████▏ | 9/11 [02:24<00:32, 16.16s/it] 91%|█████████ | 10/11 [02:40<00:16, 16.13s/it] 100%|██████████| 11/11 [02:56<00:00, 16.09s/it] 100%|██████████| 11/11 [02:56<00:00, 16.04s/it] >>> Step 1: interacting with world model ... >>>>>>>>>>>>>>>>>>>>>>>> >>> Step 2: generating actions ... @@ -115,6 +127,6 @@ DEBUG:PIL.Image:Importing XVThumbImagePlugin >>> Step 10: interacting with world model ... >>>>>>>>>>>>>>>>>>>>>>>> -real 6m51.758s -user 6m23.024s -sys 1m19.488s +real 3m40.927s +user 3m50.981s +sys 0m40.749s diff --git a/unitree_z1_dual_arm_stackbox_v2/case1/psnr_result.json b/unitree_z1_dual_arm_stackbox_v2/case1/psnr_result.json index dec481b..c0610e6 100644 --- a/unitree_z1_dual_arm_stackbox_v2/case1/psnr_result.json +++ b/unitree_z1_dual_arm_stackbox_v2/case1/psnr_result.json @@ -1,5 +1,5 @@ { - "gt_video": "/home/qhy/unifolm-world-model-action/unitree_z1_dual_arm_stackbox_v2/case1/unitree_z1_dual_arm_stackbox_v2_case1.mp4", - "pred_video": "/home/qhy/unifolm-world-model-action/unitree_z1_dual_arm_stackbox_v2/case1/output/inference/5_full_fs4.mp4", - "psnr": 28.167025381705358 + "gt_video": "unitree_z1_dual_arm_stackbox_v2/case1/unitree_z1_dual_arm_stackbox_v2_case1.mp4", + "pred_video": "unitree_z1_dual_arm_stackbox_v2/case1/output/inference/5_full_fs4.mp4", + "psnr": 27.62636266067224 } \ No newline at end of file diff --git a/unitree_z1_dual_arm_stackbox_v2/case2/psnr_result.json b/unitree_z1_dual_arm_stackbox_v2/case2/psnr_result.json new file mode 100644 index 0000000..77eb0d8 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case2/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_stackbox_v2/case2/unitree_z1_dual_arm_stackbox_v2_case2.mp4", + "pred_video": "unitree_z1_dual_arm_stackbox_v2/case2/output/inference/15_full_fs4.mp4", + "psnr": 33.90444714332389 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_stackbox_v2/case3/psnr_result.json b/unitree_z1_dual_arm_stackbox_v2/case3/psnr_result.json new file mode 100644 index 0000000..c8715fc --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case3/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_stackbox_v2/case3/unitree_z1_dual_arm_stackbox_v2_case3.mp4", + "pred_video": "unitree_z1_dual_arm_stackbox_v2/case3/output/inference/25_full_fs4.mp4", + "psnr": 34.50192428908007 +} \ No newline at end of file diff --git a/unitree_z1_dual_arm_stackbox_v2/case4/output.log b/unitree_z1_dual_arm_stackbox_v2/case4/output.log new file mode 100644 index 0000000..d53ecc1 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case4/output.log @@ -0,0 +1,179 @@ +2026-02-18 18:54:56.403136: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2026-02-18 18:54:56.451144: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered +2026-02-18 18:54:56.451189: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered +2026-02-18 18:54:56.452312: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered +2026-02-18 18:54:56.459281: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. +2026-02-18 18:54:57.381032: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +Global seed set to 123 +>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ... +>>> Prepared model loaded. +INFO:root:***** Configing Data ***** +>>> unitree_z1_stackbox: 1 data samples loaded. +>>> unitree_z1_stackbox: data stats loaded. +>>> unitree_z1_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox: data stats loaded. +>>> unitree_z1_dual_arm_stackbox: normalizer initiated. +>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded. +>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded. +>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated. +>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded. +>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated. +>>> unitree_g1_pack_camera: 1 data samples loaded. +>>> unitree_g1_pack_camera: data stats loaded. +>>> unitree_g1_pack_camera: normalizer initiated. +>>> Dataset is successfully loaded ... + ✓ KV fused: 66 attention layers + TRT output 'y': [1, 4, 16, 40, 64] torch.float32 + TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32 + TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32 + TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32 + TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32 + TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32 +>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine +>>> Generate 16 frames under each generation ... +DEBUG:h5py._conv:Creating converter from 3 to 5 +DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13 +DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9 +DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096 + 0%| | 0/11 [00:00>> Step 0: generating actions ... +>>> Step 0: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 1: generating actions ... +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BlpImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing BmpImagePlugin +DEBUG:PIL.Image:Importing BufrStubImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing CurImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DcxImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing DdsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing EpsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FitsStubImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FliImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Importing FpxImagePlugin +DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing FtexImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GbrImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing GifImagePlugin +DEBUG:PIL.Image:Importing GribStubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing Hdf5StubImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcnsImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing IcoImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing ImtImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing IptcImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing JpegImagePlugin +DEBUG:PIL.Image:Importing Jpeg2KImagePlugin +DEBUG:PIL.Image:Importing McIdasImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Importing MicImagePlugin +DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile' +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpegImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MpoImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing MspImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PalmImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcdImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PcxImagePlugin +DEBUG:PIL.Image:Importing PdfImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PixarImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing PngImagePlugin +DEBUG:PIL.Image:Importing PpmImagePlugin +DEBUG:PIL.Image:Importing PsdImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing QoiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SgiImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SpiderImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing SunImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TgaImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing TiffImagePlugin +DEBUG:PIL.Image:Importing WebPImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing WmfImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XbmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XpmImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin +DEBUG:PIL.Image:Importing XVThumbImagePlugin + 18%|█▊ | 2/11 [00:33<02:28, 16.52s/it] 27%|██▋ | 3/11 [00:49<02:12, 16.53s/it] 36%|███▋ | 4/11 [01:06<01:56, 16.64s/it] 45%|████▌ | 5/11 [01:23<01:40, 16.69s/it] 55%|█████▍ | 6/11 [01:39<01:23, 16.71s/it] 64%|██████▎ | 7/11 [01:56<01:06, 16.68s/it] 73%|███████▎ | 8/11 [02:13<00:50, 16.68s/it] 82%|████████▏ | 9/11 [02:29<00:33, 16.71s/it] 91%|█████████ | 10/11 [02:46<00:16, 16.72s/it] 100%|██████████| 11/11 [03:03<00:00, 16.69s/it] 100%|██████████| 11/11 [03:03<00:00, 16.67s/it] +>>> Step 1: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 2: generating actions ... +>>> Step 2: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 3: generating actions ... +>>> Step 3: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 4: generating actions ... +>>> Step 4: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 5: generating actions ... +>>> Step 5: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 6: generating actions ... +>>> Step 6: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 7: generating actions ... +>>> Step 7: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 8: generating actions ... +>>> Step 8: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 9: generating actions ... +>>> Step 9: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 10: generating actions ... +>>> Step 10: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> + +real 3m50.543s +user 4m20.844s +sys 0m47.704s diff --git a/unitree_z1_dual_arm_stackbox_v2/case4/psnr_result.json b/unitree_z1_dual_arm_stackbox_v2/case4/psnr_result.json new file mode 100644 index 0000000..55fa825 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case4/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_dual_arm_stackbox_v2/case4/unitree_z1_dual_arm_stackbox_v2_case4.mp4", + "pred_video": "unitree_z1_dual_arm_stackbox_v2/case4/output/inference/35_full_fs4.mp4", + "psnr": 25.49270910031428 +} \ No newline at end of file diff --git a/unitree_z1_stackbox/case1/psnr_result.json b/unitree_z1_stackbox/case1/psnr_result.json new file mode 100644 index 0000000..f29b78c --- /dev/null +++ b/unitree_z1_stackbox/case1/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_stackbox/case1/unitree_z1_stackbox_case1.mp4", + "pred_video": "unitree_z1_stackbox/case1/output/inference/5_full_fs4.mp4", + "psnr": 42.83913947323794 +} \ No newline at end of file diff --git a/unitree_z1_stackbox/case2/psnr_result.json b/unitree_z1_stackbox/case2/psnr_result.json new file mode 100644 index 0000000..84b7fd5 --- /dev/null +++ b/unitree_z1_stackbox/case2/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_stackbox/case2/unitree_z1_stackbox_case2.mp4", + "pred_video": "unitree_z1_stackbox/case2/output/inference/15_full_fs4.mp4", + "psnr": 48.64571989587276 +} \ No newline at end of file diff --git a/unitree_z1_stackbox/case3/psnr_result.json b/unitree_z1_stackbox/case3/psnr_result.json new file mode 100644 index 0000000..30e0eb9 --- /dev/null +++ b/unitree_z1_stackbox/case3/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_stackbox/case3/unitree_z1_stackbox_case3.mp4", + "pred_video": "unitree_z1_stackbox/case3/output/inference/25_full_fs4.mp4", + "psnr": 45.127553229898034 +} \ No newline at end of file diff --git a/unitree_z1_stackbox/case4/psnr_result.json b/unitree_z1_stackbox/case4/psnr_result.json new file mode 100644 index 0000000..98658db --- /dev/null +++ b/unitree_z1_stackbox/case4/psnr_result.json @@ -0,0 +1,5 @@ +{ + "gt_video": "unitree_z1_stackbox/case4/unitree_z1_stackbox_case4.mp4", + "pred_video": "unitree_z1_stackbox/case4/output/inference/35_full_fs4.mp4", + "psnr": 50.642542240144444 +} \ No newline at end of file