Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 5863fbb656 | |||
| d9d9537d33 | |||
| 202062a647 |
5
.gitignore
vendored
@@ -55,7 +55,7 @@ coverage.xml
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
|
||||
@@ -121,10 +121,11 @@ localTest/
|
||||
fig/
|
||||
figure/
|
||||
*.mp4
|
||||
*.json
|
||||
Data/ControlVAE.yml
|
||||
Data/Misc
|
||||
Data/Pretrained
|
||||
Data/utils.py
|
||||
Experiment/checkpoint
|
||||
Experiment/log
|
||||
ckpts/unifolm_wma_dual.ckpt
|
||||
*.0
|
||||
@@ -222,7 +222,7 @@ data:
|
||||
test:
|
||||
target: unifolm_wma.data.wma_data.WMAData
|
||||
params:
|
||||
data_dir: '/path/to/unifolm-world-model-action/examples/world_model_interaction_prompts'
|
||||
data_dir: '/home/qhy/unifolm-world-model-action/examples/world_model_interaction_prompts'
|
||||
video_length: ${model.params.wma_config.params.temporal_length}
|
||||
frame_stride: 2
|
||||
load_raw_resolution: True
|
||||
|
||||
89
psnr_score_for_challenge.py
Normal file
@@ -0,0 +1,89 @@
|
||||
import os
|
||||
import glob
|
||||
import numpy as np
|
||||
import json
|
||||
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
|
||||
from tqdm import tqdm
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
import PIL.Image
|
||||
|
||||
|
||||
def calculate_psnr(img1, img2):
|
||||
mse = np.mean((img1.astype(np.float64) - img2.astype(np.float64)) ** 2)
|
||||
if mse == 0:
|
||||
return float('inf')
|
||||
max_pixel = 255.0
|
||||
psnr = 20 * np.log10(max_pixel / np.sqrt(mse))
|
||||
return psnr
|
||||
|
||||
|
||||
def process_video_psnr(gt_path, pred_path):
|
||||
try:
|
||||
clip_gt = VideoFileClip(gt_path)
|
||||
clip_pred = VideoFileClip(pred_path)
|
||||
|
||||
fps = min(clip_gt.fps, clip_pred.fps)
|
||||
duration = min(clip_gt.duration, clip_pred.duration)
|
||||
|
||||
time_points = np.arange(0, duration, 1.0 / fps)
|
||||
|
||||
video_psnrs = []
|
||||
|
||||
for t in time_points:
|
||||
frame_gt = clip_gt.get_frame(t)
|
||||
frame_pred = clip_pred.get_frame(t)
|
||||
|
||||
img_gt = PIL.Image.fromarray(frame_gt).resize((256, 256), PIL.Image.Resampling.BILINEAR)
|
||||
img_pred = PIL.Image.fromarray(frame_pred).resize((256, 256), PIL.Image.Resampling.BILINEAR)
|
||||
|
||||
psnr = calculate_psnr(np.array(img_gt), np.array(img_pred))
|
||||
video_psnrs.append(psnr)
|
||||
|
||||
clip_gt.close()
|
||||
clip_pred.close()
|
||||
|
||||
return np.mean(video_psnrs) if video_psnrs else 0.0
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing {os.path.basename(gt_path)}: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument('--gt_video', type=str, required=True, help='path to reference videos')
|
||||
parser.add_argument('--pred_video', type=str, required=True, help='path to pred videos')
|
||||
parser.add_argument('--output_file', type=str, default=None, help='path to output file')
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.gt_video):
|
||||
print(f"Error: GT video not found at {args.gt_video}")
|
||||
return
|
||||
if not os.path.exists(args.pred_video):
|
||||
print(f"Error: Pred video not found at {args.pred_video}")
|
||||
return
|
||||
|
||||
print(f"Comparing:\nRef: {args.gt_video}\nPred: {args.pred_video}")
|
||||
|
||||
v_psnr = process_video_psnr(args.gt_video, args.pred_video)
|
||||
|
||||
if v_psnr is not None:
|
||||
print("-" * 30)
|
||||
print(f"Video PSNR: {v_psnr:.4f} dB")
|
||||
print("-" * 30)
|
||||
|
||||
if args.output_file:
|
||||
result = {
|
||||
"gt_video": args.gt_video,
|
||||
"pred_video": args.pred_video,
|
||||
"psnr": v_psnr
|
||||
}
|
||||
with open(args.output_file, 'w') as f:
|
||||
json.dump(result, f, indent=4)
|
||||
print(f"Result saved to {args.output_file}")
|
||||
else:
|
||||
print("Failed to calculate PSNR.")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
114
run_all_case.sh
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/bin/bash
|
||||
|
||||
# 自动执行所有场景的所有case
|
||||
# 总共5个场景,每个场景4个case,共20个case
|
||||
# 设置环境变量(离线模式)
|
||||
export HF_HUB_OFFLINE=1
|
||||
export TRANSFORMERS_OFFLINE=1
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 定义所有场景
|
||||
SCENARIOS=(
|
||||
"unitree_g1_pack_camera"
|
||||
"unitree_z1_dual_arm_cleanup_pencils"
|
||||
"unitree_z1_dual_arm_stackbox"
|
||||
"unitree_z1_dual_arm_stackbox_v2"
|
||||
"unitree_z1_stackbox"
|
||||
)
|
||||
|
||||
# 定义case数量
|
||||
CASES=(1 2 3 4)
|
||||
|
||||
# 记录开始时间
|
||||
START_TIME=$(date +%s)
|
||||
LOG_FILE="run_all_cases_$(date +%Y%m%d_%H%M%S).log"
|
||||
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo -e "${BLUE}开始执行所有场景的case${NC}"
|
||||
echo -e "${BLUE}总共: ${#SCENARIOS[@]} 个场景 x ${#CASES[@]} 个case = $((${#SCENARIOS[@]} * ${#CASES[@]})) 个任务${NC}"
|
||||
echo -e "${BLUE}日志文件: ${LOG_FILE}${NC}"
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo ""
|
||||
|
||||
# 初始化计数器
|
||||
TOTAL_CASES=$((${#SCENARIOS[@]} * ${#CASES[@]}))
|
||||
CURRENT_CASE=0
|
||||
SUCCESS_COUNT=0
|
||||
FAIL_COUNT=0
|
||||
|
||||
# 记录失败的case
|
||||
declare -a FAILED_CASES
|
||||
|
||||
# 遍历所有场景
|
||||
for scenario in "${SCENARIOS[@]}"; do
|
||||
echo -e "${YELLOW}>>> 场景: ${scenario}${NC}"
|
||||
|
||||
# 遍历所有case
|
||||
for case_num in "${CASES[@]}"; do
|
||||
CURRENT_CASE=$((CURRENT_CASE + 1))
|
||||
case_dir="${scenario}/case${case_num}"
|
||||
script_path="${case_dir}/run_world_model_interaction.sh"
|
||||
|
||||
echo -e "${BLUE}[${CURRENT_CASE}/${TOTAL_CASES}] 执行: ${case_dir}${NC}"
|
||||
|
||||
# 检查脚本是否存在
|
||||
if [ ! -f "${script_path}" ]; then
|
||||
echo -e "${RED}错误: 脚本不存在 ${script_path}${NC}"
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
FAILED_CASES+=("${case_dir} (脚本不存在)")
|
||||
continue
|
||||
fi
|
||||
|
||||
# 执行脚本
|
||||
echo "开始时间: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
|
||||
if bash "${script_path}" >> "${LOG_FILE}" 2>&1; then
|
||||
echo -e "${GREEN}✓ 成功: ${case_dir}${NC}"
|
||||
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
|
||||
else
|
||||
echo -e "${RED}✗ 失败: ${case_dir}${NC}"
|
||||
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||
FAILED_CASES+=("${case_dir}")
|
||||
fi
|
||||
|
||||
echo "结束时间: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo ""
|
||||
done
|
||||
|
||||
echo ""
|
||||
done
|
||||
|
||||
# 计算总耗时
|
||||
END_TIME=$(date +%s)
|
||||
DURATION=$((END_TIME - START_TIME))
|
||||
HOURS=$((DURATION / 3600))
|
||||
MINUTES=$(((DURATION % 3600) / 60))
|
||||
SECONDS=$((DURATION % 60))
|
||||
|
||||
# 输出总结
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo -e "${BLUE}执行完成!${NC}"
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
echo -e "总任务数: ${TOTAL_CASES}"
|
||||
echo -e "${GREEN}成功: ${SUCCESS_COUNT}${NC}"
|
||||
echo -e "${RED}失败: ${FAIL_COUNT}${NC}"
|
||||
echo -e "总耗时: ${HOURS}小时 ${MINUTES}分钟 ${SECONDS}秒"
|
||||
echo -e "详细日志: ${LOG_FILE}"
|
||||
echo ""
|
||||
|
||||
# 如果有失败的case,列出来
|
||||
if [ ${FAIL_COUNT} -gt 0 ]; then
|
||||
echo -e "${RED}失败的case列表:${NC}"
|
||||
for failed_case in "${FAILED_CASES[@]}"; do
|
||||
echo -e "${RED} - ${failed_case}${NC}"
|
||||
done
|
||||
echo ""
|
||||
fi
|
||||
|
||||
echo -e "${BLUE}========================================${NC}"
|
||||
2468
run_all_cases_20260211_194859.log
Normal file
61
run_all_psnr.sh
Normal file
@@ -0,0 +1,61 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
SCENARIOS=(
|
||||
unitree_g1_pack_camera
|
||||
unitree_z1_dual_arm_cleanup_pencils
|
||||
unitree_z1_dual_arm_stackbox
|
||||
unitree_z1_dual_arm_stackbox_v2
|
||||
unitree_z1_stackbox
|
||||
)
|
||||
|
||||
CASES=(case1 case2 case3 case4)
|
||||
|
||||
total=0
|
||||
success=0
|
||||
fail=0
|
||||
|
||||
for scenario in "${SCENARIOS[@]}"; do
|
||||
for case in "${CASES[@]}"; do
|
||||
case_dir="${scenario}/${case}"
|
||||
gt_video="${case_dir}/${scenario}_${case}.mp4"
|
||||
pred_video=$(ls "${case_dir}"/output/inference/*_full_fs*.mp4 2>/dev/null | head -1)
|
||||
output_file="${case_dir}/psnr_result.json"
|
||||
|
||||
total=$((total + 1))
|
||||
echo "=========================================="
|
||||
echo "[${total}/20] ${case_dir}"
|
||||
|
||||
if [ ! -f "$gt_video" ]; then
|
||||
echo " SKIP: GT video not found: $gt_video"
|
||||
fail=$((fail + 1))
|
||||
continue
|
||||
fi
|
||||
if [ -z "$pred_video" ]; then
|
||||
echo " SKIP: pred video not found in ${case_dir}/output/inference/"
|
||||
fail=$((fail + 1))
|
||||
continue
|
||||
fi
|
||||
|
||||
echo " GT: $gt_video"
|
||||
echo " Pred: $pred_video"
|
||||
echo " Out: $output_file"
|
||||
|
||||
if python3 psnr_score_for_challenge.py \
|
||||
--gt_video "$gt_video" \
|
||||
--pred_video "$pred_video" \
|
||||
--output_file "$output_file"; then
|
||||
success=$((success + 1))
|
||||
echo " DONE"
|
||||
else
|
||||
fail=$((fail + 1))
|
||||
echo " FAILED"
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
echo "=========================================="
|
||||
echo "Finished: ${success} success, ${fail} fail, ${total} total"
|
||||
127
unitree_g1_pack_camera/case1/output.log
Normal file
@@ -0,0 +1,127 @@
|
||||
2026-02-11 19:49:03.885238: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 19:49:03.934263: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 19:49:03.934309: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 19:49:03.935622: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 19:49:03.943041: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 19:49:04.852993: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
9%|▉ | 1/11 [01:12<12:09, 72.95s/it]
|
||||
18%|█▊ | 2/11 [02:26<10:58, 73.19s/it]
|
||||
27%|██▋ | 3/11 [03:39<09:45, 73.21s/it]
|
||||
36%|███▋ | 4/11 [04:52<08:32, 73.21s/it]
|
||||
45%|████▌ | 5/11 [06:05<07:19, 73.22s/it]
|
||||
55%|█████▍ | 6/11 [07:19<06:06, 73.20s/it]
|
||||
64%|██████▎ | 7/11 [08:32<04:52, 73.16s/it]
|
||||
73%|███████▎ | 8/11 [09:45<03:39, 73.14s/it]
|
||||
82%|████████▏ | 9/11 [10:58<02:26, 73.13s/it]
|
||||
91%|█████████ | 10/11 [12:11<01:13, 73.15s/it]
|
||||
100%|██████████| 11/11 [13:24<00:00, 73.18s/it]
|
||||
100%|██████████| 11/11 [13:24<00:00, 73.17s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
5
unitree_g1_pack_camera/case1/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_g1_pack_camera/case1/unitree_g1_pack_camera_case1.mp4",
|
||||
"pred_video": "unitree_g1_pack_camera/case1/output/inference/0_full_fs6.mp4",
|
||||
"psnr": 35.615362167470806
|
||||
}
|
||||
24
unitree_g1_pack_camera/case1/run_world_model_interaction.sh
Normal file
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_g1_pack_camera/case1"
|
||||
dataset="unitree_g1_pack_camera"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_g1_pack_camera/case1/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 6 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 209 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
0,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30
|
||||
|
127
unitree_g1_pack_camera/case2/output.log
Normal file
@@ -0,0 +1,127 @@
|
||||
2026-02-11 20:04:06.049535: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 20:04:06.099186: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 20:04:06.099232: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 20:04:06.100544: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 20:04:06.108023: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 20:04:07.025500: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
9%|▉ | 1/11 [01:14<12:22, 74.22s/it]
|
||||
18%|█▊ | 2/11 [02:28<11:09, 74.33s/it]
|
||||
27%|██▋ | 3/11 [03:42<09:54, 74.32s/it]
|
||||
36%|███▋ | 4/11 [04:57<08:40, 74.32s/it]
|
||||
45%|████▌ | 5/11 [06:11<07:25, 74.28s/it]
|
||||
55%|█████▍ | 6/11 [07:25<06:10, 74.19s/it]
|
||||
64%|██████▎ | 7/11 [08:39<04:56, 74.11s/it]
|
||||
73%|███████▎ | 8/11 [09:53<03:42, 74.07s/it]
|
||||
82%|████████▏ | 9/11 [11:07<02:28, 74.06s/it]
|
||||
91%|█████████ | 10/11 [12:21<01:14, 74.01s/it]
|
||||
100%|██████████| 11/11 [13:35<00:00, 73.98s/it]
|
||||
100%|██████████| 11/11 [13:35<00:00, 74.12s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
5
unitree_g1_pack_camera/case2/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_g1_pack_camera/case2/unitree_g1_pack_camera_case2.mp4",
|
||||
"pred_video": "unitree_g1_pack_camera/case2/output/inference/50_full_fs6.mp4",
|
||||
"psnr": 34.61979248212279
|
||||
}
|
||||
24
unitree_g1_pack_camera/case2/run_world_model_interaction.sh
Normal file
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_g1_pack_camera/case2"
|
||||
dataset="unitree_g1_pack_camera"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_g1_pack_camera/case2/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 6 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 214 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
50,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30
|
||||
|
127
unitree_g1_pack_camera/case3/output.log
Normal file
@@ -0,0 +1,127 @@
|
||||
2026-02-11 20:19:19.271045: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 20:19:19.320688: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 20:19:19.320734: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 20:19:19.322059: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 20:19:19.329606: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 20:19:20.248938: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
9%|▉ | 1/11 [01:14<12:22, 74.28s/it]
|
||||
18%|█▊ | 2/11 [02:28<11:09, 74.38s/it]
|
||||
27%|██▋ | 3/11 [03:43<09:55, 74.45s/it]
|
||||
36%|███▋ | 4/11 [04:57<08:41, 74.43s/it]
|
||||
45%|████▌ | 5/11 [06:11<07:25, 74.25s/it]
|
||||
55%|█████▍ | 6/11 [07:26<06:11, 74.31s/it]
|
||||
64%|██████▎ | 7/11 [08:40<04:57, 74.26s/it]
|
||||
73%|███████▎ | 8/11 [09:54<03:43, 74.34s/it]
|
||||
82%|████████▏ | 9/11 [11:08<02:28, 74.29s/it]
|
||||
91%|█████████ | 10/11 [12:23<01:14, 74.26s/it]
|
||||
100%|██████████| 11/11 [13:37<00:00, 74.39s/it]
|
||||
100%|██████████| 11/11 [13:37<00:00, 74.34s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
5
unitree_g1_pack_camera/case3/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_g1_pack_camera/case3/unitree_g1_pack_camera_case3.mp4",
|
||||
"pred_video": "unitree_g1_pack_camera/case3/output/inference/100_full_fs6.mp4",
|
||||
"psnr": 37.034952654534486
|
||||
}
|
||||
24
unitree_g1_pack_camera/case3/run_world_model_interaction.sh
Normal file
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_g1_pack_camera/case3"
|
||||
dataset="unitree_g1_pack_camera"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_g1_pack_camera/case3/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 6 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 190 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
100,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30
|
||||
|
127
unitree_g1_pack_camera/case4/output.log
Normal file
@@ -0,0 +1,127 @@
|
||||
2026-02-11 20:34:34.563818: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 20:34:34.613426: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 20:34:34.613485: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 20:34:34.614802: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 20:34:34.622286: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 20:34:35.540506: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
9%|▉ | 1/11 [01:14<12:25, 74.52s/it]
|
||||
18%|█▊ | 2/11 [02:29<11:15, 75.00s/it]
|
||||
27%|██▋ | 3/11 [03:44<09:59, 74.99s/it]
|
||||
36%|███▋ | 4/11 [04:59<08:43, 74.74s/it]
|
||||
45%|████▌ | 5/11 [06:13<07:26, 74.48s/it]
|
||||
55%|█████▍ | 6/11 [07:27<06:12, 74.56s/it]
|
||||
64%|██████▎ | 7/11 [08:42<04:57, 74.46s/it]
|
||||
73%|███████▎ | 8/11 [09:56<03:43, 74.48s/it]
|
||||
82%|████████▏ | 9/11 [11:10<02:28, 74.32s/it]
|
||||
91%|█████████ | 10/11 [12:23<01:13, 73.94s/it]
|
||||
100%|██████████| 11/11 [13:36<00:00, 73.64s/it]
|
||||
100%|██████████| 11/11 [13:36<00:00, 74.25s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
5
unitree_g1_pack_camera/case4/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_g1_pack_camera/case4/unitree_g1_pack_camera_case4.mp4",
|
||||
"pred_video": "unitree_g1_pack_camera/case4/output/inference/200_full_fs6.mp4",
|
||||
"psnr": 31.43390896360405
|
||||
}
|
||||
24
unitree_g1_pack_camera/case4/run_world_model_interaction.sh
Normal file
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_g1_pack_camera/case4"
|
||||
dataset="unitree_g1_pack_camera"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_g1_pack_camera/case4/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 6 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 221 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
200,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30
|
||||
|
118
unitree_z1_dual_arm_cleanup_pencils/case1/output.log
Normal file
@@ -0,0 +1,118 @@
|
||||
2026-02-11 20:49:47.965949: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 20:49:48.015942: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 20:49:48.015997: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 20:49:48.017330: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 20:49:48.024854: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 20:49:48.943205: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/8 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
12%|█▎ | 1/8 [01:15<08:48, 75.51s/it]
|
||||
25%|██▌ | 2/8 [02:30<07:32, 75.39s/it]
|
||||
38%|███▊ | 3/8 [03:46<06:16, 75.35s/it]
|
||||
50%|█████ | 4/8 [05:00<05:00, 75.01s/it]
|
||||
62%|██████▎ | 5/8 [06:14<03:44, 74.68s/it]
|
||||
75%|███████▌ | 6/8 [07:28<02:28, 74.40s/it]
|
||||
88%|████████▊ | 7/8 [08:42<01:14, 74.19s/it]
|
||||
100%|██████████| 8/8 [09:55<00:00, 73.95s/it]
|
||||
100%|██████████| 8/8 [09:55<00:00, 74.47s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case1/unitree_z1_dual_arm_cleanup_pencils_case1.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case1/output/inference/0_full_fs4.mp4",
|
||||
"psnr": 47.911564449209735
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_z1_dual_arm_cleanup_pencils/case1"
|
||||
dataset="unitree_z1_dual_arm_cleanup_pencils"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 4 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 8 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 212 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
0,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30
|
||||
|
118
unitree_z1_dual_arm_cleanup_pencils/case2/output.log
Normal file
@@ -0,0 +1,118 @@
|
||||
2026-02-11 21:01:19.535243: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 21:01:19.585230: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 21:01:19.585275: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 21:01:19.586600: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 21:01:19.594107: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 21:01:20.510688: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/8 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
12%|█▎ | 1/8 [01:16<08:54, 76.34s/it]
|
||||
25%|██▌ | 2/8 [02:32<07:37, 76.28s/it]
|
||||
38%|███▊ | 3/8 [03:48<06:21, 76.24s/it]
|
||||
50%|█████ | 4/8 [05:04<05:04, 76.15s/it]
|
||||
62%|██████▎ | 5/8 [06:21<03:48, 76.24s/it]
|
||||
75%|███████▌ | 6/8 [07:36<02:32, 76.08s/it]
|
||||
88%|████████▊ | 7/8 [08:52<01:15, 75.93s/it]
|
||||
100%|██████████| 8/8 [10:09<00:00, 76.12s/it]
|
||||
100%|██████████| 8/8 [10:09<00:00, 76.14s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case2/unitree_z1_dual_arm_cleanup_pencils_case2.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case2/output/inference/50_full_fs4.mp4",
|
||||
"psnr": 48.344571927558974
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_z1_dual_arm_cleanup_pencils/case2"
|
||||
dataset="unitree_z1_dual_arm_cleanup_pencils"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 4 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 8 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 202 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
50,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30
|
||||
|
118
unitree_z1_dual_arm_cleanup_pencils/case3/output.log
Normal file
@@ -0,0 +1,118 @@
|
||||
2026-02-11 21:13:04.812376: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 21:13:04.862167: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 21:13:04.862223: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 21:13:04.863549: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 21:13:04.871078: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 21:13:05.785070: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/8 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
12%|█▎ | 1/8 [01:15<08:45, 75.11s/it]
|
||||
25%|██▌ | 2/8 [02:30<07:31, 75.30s/it]
|
||||
38%|███▊ | 3/8 [03:45<06:16, 75.32s/it]
|
||||
50%|█████ | 4/8 [05:01<05:01, 75.29s/it]
|
||||
62%|██████▎ | 5/8 [06:16<03:46, 75.38s/it]
|
||||
75%|███████▌ | 6/8 [07:32<02:30, 75.48s/it]
|
||||
88%|████████▊ | 7/8 [08:47<01:15, 75.39s/it]
|
||||
100%|██████████| 8/8 [10:02<00:00, 75.30s/it]
|
||||
100%|██████████| 8/8 [10:02<00:00, 75.33s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case3/unitree_z1_dual_arm_cleanup_pencils_case3.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case3/output/inference/100_full_fs4.mp4",
|
||||
"psnr": 41.152374490134825
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_z1_dual_arm_cleanup_pencils/case3"
|
||||
dataset="unitree_z1_dual_arm_cleanup_pencils"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 4 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 8 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 183 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
100,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30
|
||||
|
118
unitree_z1_dual_arm_cleanup_pencils/case4/output.log
Normal file
@@ -0,0 +1,118 @@
|
||||
2026-02-11 21:24:42.443699: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 21:24:42.494143: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 21:24:42.494201: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 21:24:42.495506: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 21:24:42.503003: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 21:24:43.415898: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/8 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
12%|█▎ | 1/8 [01:15<08:46, 75.28s/it]
|
||||
25%|██▌ | 2/8 [02:30<07:32, 75.34s/it]
|
||||
38%|███▊ | 3/8 [03:45<06:15, 75.08s/it]
|
||||
50%|█████ | 4/8 [04:59<04:58, 74.69s/it]
|
||||
62%|██████▎ | 5/8 [06:13<03:43, 74.43s/it]
|
||||
75%|███████▌ | 6/8 [07:27<02:28, 74.27s/it]
|
||||
88%|████████▊ | 7/8 [08:41<01:14, 74.21s/it]
|
||||
100%|██████████| 8/8 [09:55<00:00, 74.13s/it]
|
||||
100%|██████████| 8/8 [09:55<00:00, 74.43s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case4/unitree_z1_dual_arm_cleanup_pencils_case4.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case4/output/inference/200_full_fs4.mp4",
|
||||
"psnr": 46.025723557253855
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_z1_dual_arm_cleanup_pencils/case4"
|
||||
dataset="unitree_z1_dual_arm_cleanup_pencils"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 4 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 8 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 174 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
200,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30
|
||||
|
115
unitree_z1_dual_arm_stackbox/case1/output.log
Normal file
@@ -0,0 +1,115 @@
|
||||
2026-02-11 21:36:14.761055: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 21:36:14.811056: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 21:36:14.811115: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 21:36:14.812480: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 21:36:14.820115: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 21:36:15.736583: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/7 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
14%|█▍ | 1/7 [01:15<07:34, 75.70s/it]
|
||||
29%|██▊ | 2/7 [02:31<06:18, 75.65s/it]
|
||||
43%|████▎ | 3/7 [03:46<05:02, 75.52s/it]
|
||||
57%|█████▋ | 4/7 [05:02<03:46, 75.47s/it]
|
||||
71%|███████▏ | 5/7 [06:17<02:30, 75.40s/it]
|
||||
86%|████████▌ | 6/7 [07:32<01:15, 75.37s/it]
|
||||
100%|██████████| 7/7 [08:48<00:00, 75.38s/it]
|
||||
100%|██████████| 7/7 [08:48<00:00, 75.44s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
5
unitree_z1_dual_arm_stackbox/case1/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox/case1/unitree_z1_dual_arm_stackbox_case1.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox/case1/output/inference/5_full_fs4.mp4",
|
||||
"psnr": 44.3480149502738
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_z1_dual_arm_stackbox/case1"
|
||||
dataset="unitree_z1_dual_arm_stackbox"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 4 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 7 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 272 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
5,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30
|
||||
|
115
unitree_z1_dual_arm_stackbox/case2/output.log
Normal file
@@ -0,0 +1,115 @@
|
||||
2026-02-11 21:46:41.375935: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 21:46:41.426557: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 21:46:41.426614: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 21:46:41.427937: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 21:46:41.435507: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 21:46:42.361310: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/7 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
14%|█▍ | 1/7 [01:16<07:38, 76.39s/it]
|
||||
29%|██▊ | 2/7 [02:33<06:23, 76.69s/it]
|
||||
43%|████▎ | 3/7 [03:50<05:07, 76.87s/it]
|
||||
57%|█████▋ | 4/7 [05:07<03:50, 76.91s/it]
|
||||
71%|███████▏ | 5/7 [06:23<02:33, 76.80s/it]
|
||||
86%|████████▌ | 6/7 [07:40<01:16, 76.77s/it]
|
||||
100%|██████████| 7/7 [08:57<00:00, 76.85s/it]
|
||||
100%|██████████| 7/7 [08:57<00:00, 76.81s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
5
unitree_z1_dual_arm_stackbox/case2/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox/case2/unitree_z1_dual_arm_stackbox_case2.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox/case2/output/inference/15_full_fs4.mp4",
|
||||
"psnr": 39.867728254007716
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_z1_dual_arm_stackbox/case2"
|
||||
dataset="unitree_z1_dual_arm_stackbox"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 4 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 7 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 268 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
15,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30
|
||||
|
115
unitree_z1_dual_arm_stackbox/case3/output.log
Normal file
@@ -0,0 +1,115 @@
|
||||
2026-02-11 21:57:17.623993: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 21:57:17.673835: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 21:57:17.673891: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 21:57:17.675211: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 21:57:17.682716: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 21:57:18.593525: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/7 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
14%|█▍ | 1/7 [01:15<07:33, 75.59s/it]
|
||||
29%|██▊ | 2/7 [02:31<06:17, 75.59s/it]
|
||||
43%|████▎ | 3/7 [03:46<05:01, 75.44s/it]
|
||||
57%|█████▋ | 4/7 [05:01<03:46, 75.39s/it]
|
||||
71%|███████▏ | 5/7 [06:17<02:30, 75.35s/it]
|
||||
86%|████████▌ | 6/7 [07:32<01:15, 75.32s/it]
|
||||
100%|██████████| 7/7 [08:47<00:00, 75.24s/it]
|
||||
100%|██████████| 7/7 [08:47<00:00, 75.34s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
5
unitree_z1_dual_arm_stackbox/case3/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox/case3/unitree_z1_dual_arm_stackbox_case3.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox/case3/output/inference/25_full_fs4.mp4",
|
||||
"psnr": 39.19101039445159
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_z1_dual_arm_stackbox/case3"
|
||||
dataset="unitree_z1_dual_arm_stackbox"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 4 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 7 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 267 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
25,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30
|
||||
|
115
unitree_z1_dual_arm_stackbox/case4/output.log
Normal file
@@ -0,0 +1,115 @@
|
||||
2026-02-11 22:07:43.398736: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 22:07:43.448264: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 22:07:43.448321: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 22:07:43.449636: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 22:07:43.457127: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 22:07:44.370935: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/7 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
14%|█▍ | 1/7 [01:17<07:42, 77.04s/it]
|
||||
29%|██▊ | 2/7 [02:33<06:24, 76.95s/it]
|
||||
43%|████▎ | 3/7 [03:50<05:07, 76.87s/it]
|
||||
57%|█████▋ | 4/7 [05:06<03:49, 76.59s/it]
|
||||
71%|███████▏ | 5/7 [06:24<02:33, 76.82s/it]
|
||||
86%|████████▌ | 6/7 [07:39<01:16, 76.43s/it]
|
||||
100%|██████████| 7/7 [08:55<00:00, 76.06s/it]
|
||||
100%|██████████| 7/7 [08:55<00:00, 76.44s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
5
unitree_z1_dual_arm_stackbox/case4/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox/case4/unitree_z1_dual_arm_stackbox_case4.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox/case4/output/inference/35_full_fs4.mp4",
|
||||
"psnr": 40.29563315341769
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_z1_dual_arm_stackbox/case4"
|
||||
dataset="unitree_z1_dual_arm_stackbox"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 4 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 7 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 280 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
35,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30
|
||||
|
127
unitree_z1_dual_arm_stackbox_v2/case1/output.log
Normal file
@@ -0,0 +1,127 @@
|
||||
2026-02-11 22:18:17.396072: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 22:18:17.446095: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 22:18:17.446154: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 22:18:17.447480: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 22:18:17.455025: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 22:18:18.367007: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
9%|▉ | 1/11 [01:15<12:35, 75.53s/it]
|
||||
18%|█▊ | 2/11 [02:30<11:18, 75.39s/it]
|
||||
27%|██▋ | 3/11 [03:46<10:03, 75.38s/it]
|
||||
36%|███▋ | 4/11 [05:01<08:48, 75.47s/it]
|
||||
45%|████▌ | 5/11 [06:16<07:31, 75.32s/it]
|
||||
55%|█████▍ | 6/11 [07:31<06:15, 75.08s/it]
|
||||
64%|██████▎ | 7/11 [08:46<05:00, 75.07s/it]
|
||||
73%|███████▎ | 8/11 [10:00<03:44, 74.76s/it]
|
||||
82%|████████▏ | 9/11 [11:15<02:29, 74.87s/it]
|
||||
91%|█████████ | 10/11 [12:30<01:14, 74.79s/it]
|
||||
100%|██████████| 11/11 [13:45<00:00, 74.80s/it]
|
||||
100%|██████████| 11/11 [13:45<00:00, 75.02s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
5
unitree_z1_dual_arm_stackbox_v2/case1/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case1/unitree_z1_dual_arm_stackbox_v2_case1.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case1/output/inference/5_full_fs4.mp4",
|
||||
"psnr": 25.812741419225095
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
res_dir="unitree_z1_dual_arm_stackbox_v2/case1"
|
||||
dataset="unitree_z1_dual_arm_stackbox_v2"
|
||||
|
||||
{
|
||||
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
|
||||
--seed 123 \
|
||||
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--savedir "${res_dir}/output" \
|
||||
--bs 1 --height 320 --width 512 \
|
||||
--unconditional_guidance_scale 1.0 \
|
||||
--ddim_steps 50 \
|
||||
--ddim_eta 1.0 \
|
||||
--prompt_dir "unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts" \
|
||||
--dataset ${dataset} \
|
||||
--video_length 16 \
|
||||
--frame_stride 4 \
|
||||
--n_action_steps 16 \
|
||||
--exe_steps 16 \
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
After Width: | Height: | Size: 186 KiB |
@@ -0,0 +1,2 @@
|
||||
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
|
||||
5,x,x,unitree_z1_dual_arm_stackbox_v2,"Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top",x,x,x,Unitree Z1 Robot Dual-Arm,30
|
||||
|
127
unitree_z1_dual_arm_stackbox_v2/case2/output.log
Normal file
@@ -0,0 +1,127 @@
|
||||
2026-02-11 22:33:42.261398: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 22:33:42.310786: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 22:33:42.310845: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 22:33:42.312191: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 22:33:42.319738: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 22:33:43.232517: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
9%|▉ | 1/11 [01:13<12:18, 73.90s/it]
|
||||
18%|█▊ | 2/11 [02:27<11:05, 73.99s/it]
|
||||
27%|██▋ | 3/11 [03:41<09:50, 73.86s/it]
|
||||
36%|███▋ | 4/11 [04:55<08:35, 73.70s/it]
|
||||
45%|████▌ | 5/11 [06:08<07:20, 73.48s/it]
|
||||
55%|█████▍ | 6/11 [07:21<06:06, 73.39s/it]
|
||||
64%|██████▎ | 7/11 [08:34<04:53, 73.28s/it]
|
||||
73%|███████▎ | 8/11 [09:47<03:39, 73.11s/it]
|
||||
82%|████████▏ | 9/11 [11:00<02:26, 73.21s/it]
|
||||
91%|█████████ | 10/11 [12:14<01:13, 73.49s/it]
|
||||
100%|██████████| 11/11 [13:28<00:00, 73.55s/it]
|
||||
100%|██████████| 11/11 [13:28<00:00, 73.50s/it]
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
5
unitree_z1_dual_arm_stackbox_v2/case2/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case2/unitree_z1_dual_arm_stackbox_v2_case2.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case2/output/inference/15_full_fs4.mp4",
|
||||
"psnr": 33.90444714332389
|
||||
}
|
||||