Compare commits
12 Commits
qhy3
...
5e0e21d91b
| Author | SHA1 | Date | |
|---|---|---|---|
| 5e0e21d91b | |||
| d5bec53f61 | |||
| 508b91f5a2 | |||
| 3101252c25 | |||
| f386a5810b | |||
| 352a79035f | |||
| 9a08e27a19 | |||
| b558856e1e | |||
| dcbcb2c377 | |||
| ff43432ef9 | |||
| afa12ba031 | |||
| bf4d66c874 |
@@ -4,7 +4,18 @@
|
|||||||
"Bash(conda env list:*)",
|
"Bash(conda env list:*)",
|
||||||
"Bash(mamba env:*)",
|
"Bash(mamba env:*)",
|
||||||
"Bash(micromamba env list:*)",
|
"Bash(micromamba env list:*)",
|
||||||
"Bash(echo:*)"
|
"Bash(echo:*)",
|
||||||
|
"Bash(git show:*)",
|
||||||
|
"Bash(nvidia-smi:*)",
|
||||||
|
"Bash(conda activate unifolm-wma)",
|
||||||
|
"Bash(conda info:*)",
|
||||||
|
"Bash(direnv allow:*)",
|
||||||
|
"Bash(ls:*)",
|
||||||
|
"Bash(for scenario in unitree_g1_pack_camera unitree_z1_dual_arm_cleanup_pencils unitree_z1_dual_arm_stackbox unitree_z1_dual_arm_stackbox_v2 unitree_z1_stackbox)",
|
||||||
|
"Bash(do for case in case1 case2 case3 case4)",
|
||||||
|
"Bash(done)",
|
||||||
|
"Bash(chmod:*)",
|
||||||
|
"Bash(ln:*)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
2
.envrc
Normal file
2
.envrc
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
eval "$(conda shell.bash hook 2>/dev/null)"
|
||||||
|
conda activate unifolm-wma
|
||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -131,3 +131,4 @@ Experiment/log
|
|||||||
*.ckpt
|
*.ckpt
|
||||||
|
|
||||||
*.0
|
*.0
|
||||||
|
ckpts/unifolm_wma_dual.ckpt.prepared.pt
|
||||||
|
|||||||
114
run_all_case.sh
Normal file
114
run_all_case.sh
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# 自动执行所有场景的所有case
|
||||||
|
# 总共5个场景,每个场景4个case,共20个case
|
||||||
|
# 设置环境变量(离线模式)
|
||||||
|
export HF_HUB_OFFLINE=1
|
||||||
|
export TRANSFORMERS_OFFLINE=1
|
||||||
|
|
||||||
|
# 颜色定义
|
||||||
|
RED='\033[0;31m'
|
||||||
|
GREEN='\033[0;32m'
|
||||||
|
YELLOW='\033[1;33m'
|
||||||
|
BLUE='\033[0;34m'
|
||||||
|
NC='\033[0m' # No Color
|
||||||
|
|
||||||
|
# 定义所有场景
|
||||||
|
SCENARIOS=(
|
||||||
|
"unitree_g1_pack_camera"
|
||||||
|
"unitree_z1_dual_arm_cleanup_pencils"
|
||||||
|
"unitree_z1_dual_arm_stackbox"
|
||||||
|
"unitree_z1_dual_arm_stackbox_v2"
|
||||||
|
"unitree_z1_stackbox"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 定义case数量
|
||||||
|
CASES=(1 2 3 4)
|
||||||
|
|
||||||
|
# 记录开始时间
|
||||||
|
START_TIME=$(date +%s)
|
||||||
|
LOG_FILE="run_all_cases_$(date +%Y%m%d_%H%M%S).log"
|
||||||
|
|
||||||
|
echo -e "${BLUE}========================================${NC}"
|
||||||
|
echo -e "${BLUE}开始执行所有场景的case${NC}"
|
||||||
|
echo -e "${BLUE}总共: ${#SCENARIOS[@]} 个场景 x ${#CASES[@]} 个case = $((${#SCENARIOS[@]} * ${#CASES[@]})) 个任务${NC}"
|
||||||
|
echo -e "${BLUE}日志文件: ${LOG_FILE}${NC}"
|
||||||
|
echo -e "${BLUE}========================================${NC}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 初始化计数器
|
||||||
|
TOTAL_CASES=$((${#SCENARIOS[@]} * ${#CASES[@]}))
|
||||||
|
CURRENT_CASE=0
|
||||||
|
SUCCESS_COUNT=0
|
||||||
|
FAIL_COUNT=0
|
||||||
|
|
||||||
|
# 记录失败的case
|
||||||
|
declare -a FAILED_CASES
|
||||||
|
|
||||||
|
# 遍历所有场景
|
||||||
|
for scenario in "${SCENARIOS[@]}"; do
|
||||||
|
echo -e "${YELLOW}>>> 场景: ${scenario}${NC}"
|
||||||
|
|
||||||
|
# 遍历所有case
|
||||||
|
for case_num in "${CASES[@]}"; do
|
||||||
|
CURRENT_CASE=$((CURRENT_CASE + 1))
|
||||||
|
case_dir="${scenario}/case${case_num}"
|
||||||
|
script_path="${case_dir}/run_world_model_interaction.sh"
|
||||||
|
|
||||||
|
echo -e "${BLUE}[${CURRENT_CASE}/${TOTAL_CASES}] 执行: ${case_dir}${NC}"
|
||||||
|
|
||||||
|
# 检查脚本是否存在
|
||||||
|
if [ ! -f "${script_path}" ]; then
|
||||||
|
echo -e "${RED}错误: 脚本不存在 ${script_path}${NC}"
|
||||||
|
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||||
|
FAILED_CASES+=("${case_dir} (脚本不存在)")
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 执行脚本
|
||||||
|
echo "开始时间: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||||
|
|
||||||
|
if bash "${script_path}" >> "${LOG_FILE}" 2>&1; then
|
||||||
|
echo -e "${GREEN}✓ 成功: ${case_dir}${NC}"
|
||||||
|
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
|
||||||
|
else
|
||||||
|
echo -e "${RED}✗ 失败: ${case_dir}${NC}"
|
||||||
|
FAIL_COUNT=$((FAIL_COUNT + 1))
|
||||||
|
FAILED_CASES+=("${case_dir}")
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "结束时间: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
done
|
||||||
|
|
||||||
|
# 计算总耗时
|
||||||
|
END_TIME=$(date +%s)
|
||||||
|
DURATION=$((END_TIME - START_TIME))
|
||||||
|
HOURS=$((DURATION / 3600))
|
||||||
|
MINUTES=$(((DURATION % 3600) / 60))
|
||||||
|
SECONDS=$((DURATION % 60))
|
||||||
|
|
||||||
|
# 输出总结
|
||||||
|
echo -e "${BLUE}========================================${NC}"
|
||||||
|
echo -e "${BLUE}执行完成!${NC}"
|
||||||
|
echo -e "${BLUE}========================================${NC}"
|
||||||
|
echo -e "总任务数: ${TOTAL_CASES}"
|
||||||
|
echo -e "${GREEN}成功: ${SUCCESS_COUNT}${NC}"
|
||||||
|
echo -e "${RED}失败: ${FAIL_COUNT}${NC}"
|
||||||
|
echo -e "总耗时: ${HOURS}小时 ${MINUTES}分钟 ${SECONDS}秒"
|
||||||
|
echo -e "详细日志: ${LOG_FILE}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# 如果有失败的case,列出来
|
||||||
|
if [ ${FAIL_COUNT} -gt 0 ]; then
|
||||||
|
echo -e "${RED}失败的case列表:${NC}"
|
||||||
|
for failed_case in "${FAILED_CASES[@]}"; do
|
||||||
|
echo -e "${RED} - ${failed_case}${NC}"
|
||||||
|
done
|
||||||
|
echo ""
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo -e "${BLUE}========================================${NC}"
|
||||||
2328
run_all_cases_20260211_135725.log
Normal file
2328
run_all_cases_20260211_135725.log
Normal file
File diff suppressed because it is too large
Load Diff
37
run_all_cases_20260211_173422.log
Normal file
37
run_all_cases_20260211_173422.log
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
2026-02-11 17:34:29.188470: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 17:34:29.238296: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 17:34:29.238342: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 17:34:29.239649: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 17:34:29.247152: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 17:34:30.172640: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
2388
run_all_cases_20260211_173635.log
Normal file
2388
run_all_cases_20260211_173635.log
Normal file
File diff suppressed because it is too large
Load Diff
0
run_all_cases_20260211_181733.log
Normal file
0
run_all_cases_20260211_181733.log
Normal file
61
run_all_psnr.sh
Executable file
61
run_all_psnr.sh
Executable file
@@ -0,0 +1,61 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
cd "$SCRIPT_DIR"
|
||||||
|
|
||||||
|
SCENARIOS=(
|
||||||
|
unitree_g1_pack_camera
|
||||||
|
unitree_z1_dual_arm_cleanup_pencils
|
||||||
|
unitree_z1_dual_arm_stackbox
|
||||||
|
unitree_z1_dual_arm_stackbox_v2
|
||||||
|
unitree_z1_stackbox
|
||||||
|
)
|
||||||
|
|
||||||
|
CASES=(case1 case2 case3 case4)
|
||||||
|
|
||||||
|
total=0
|
||||||
|
success=0
|
||||||
|
fail=0
|
||||||
|
|
||||||
|
for scenario in "${SCENARIOS[@]}"; do
|
||||||
|
for case in "${CASES[@]}"; do
|
||||||
|
case_dir="${scenario}/${case}"
|
||||||
|
gt_video="${case_dir}/${scenario}_${case}.mp4"
|
||||||
|
pred_video=$(ls "${case_dir}"/output/inference/*_full_fs*.mp4 2>/dev/null | head -1)
|
||||||
|
output_file="${case_dir}/psnr_result.json"
|
||||||
|
|
||||||
|
total=$((total + 1))
|
||||||
|
echo "=========================================="
|
||||||
|
echo "[${total}/20] ${case_dir}"
|
||||||
|
|
||||||
|
if [ ! -f "$gt_video" ]; then
|
||||||
|
echo " SKIP: GT video not found: $gt_video"
|
||||||
|
fail=$((fail + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
if [ -z "$pred_video" ]; then
|
||||||
|
echo " SKIP: pred video not found in ${case_dir}/output/inference/"
|
||||||
|
fail=$((fail + 1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo " GT: $gt_video"
|
||||||
|
echo " Pred: $pred_video"
|
||||||
|
echo " Out: $output_file"
|
||||||
|
|
||||||
|
if python3 psnr_score_for_challenge.py \
|
||||||
|
--gt_video "$gt_video" \
|
||||||
|
--pred_video "$pred_video" \
|
||||||
|
--output_file "$output_file"; then
|
||||||
|
success=$((success + 1))
|
||||||
|
echo " DONE"
|
||||||
|
else
|
||||||
|
fail=$((fail + 1))
|
||||||
|
echo " FAILED"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "=========================================="
|
||||||
|
echo "Finished: ${success} success, ${fail} fail, ${total} total"
|
||||||
@@ -9,6 +9,8 @@ import logging
|
|||||||
import einops
|
import einops
|
||||||
import warnings
|
import warnings
|
||||||
import imageio
|
import imageio
|
||||||
|
import atexit
|
||||||
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
|
|
||||||
from pytorch_lightning import seed_everything
|
from pytorch_lightning import seed_everything
|
||||||
from omegaconf import OmegaConf
|
from omegaconf import OmegaConf
|
||||||
@@ -16,8 +18,9 @@ from tqdm import tqdm
|
|||||||
from einops import rearrange, repeat
|
from einops import rearrange, repeat
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from torch import nn
|
from torch import nn
|
||||||
from eval_utils import populate_queues, log_to_tensorboard
|
from eval_utils import populate_queues
|
||||||
from collections import deque
|
from collections import deque
|
||||||
|
from typing import Optional, List, Any
|
||||||
|
|
||||||
torch.backends.cuda.matmul.allow_tf32 = True
|
torch.backends.cuda.matmul.allow_tf32 = True
|
||||||
torch.backends.cudnn.allow_tf32 = True
|
torch.backends.cudnn.allow_tf32 = True
|
||||||
@@ -153,6 +156,81 @@ def save_results(video: Tensor, filename: str, fps: int = 8) -> None:
|
|||||||
options={'crf': '10'})
|
options={'crf': '10'})
|
||||||
|
|
||||||
|
|
||||||
|
# ========== Async I/O ==========
|
||||||
|
_io_executor: Optional[ThreadPoolExecutor] = None
|
||||||
|
_io_futures: List[Any] = []
|
||||||
|
|
||||||
|
|
||||||
|
def _get_io_executor() -> ThreadPoolExecutor:
|
||||||
|
global _io_executor
|
||||||
|
if _io_executor is None:
|
||||||
|
_io_executor = ThreadPoolExecutor(max_workers=2)
|
||||||
|
return _io_executor
|
||||||
|
|
||||||
|
|
||||||
|
def _flush_io():
|
||||||
|
"""Wait for all pending async I/O to finish."""
|
||||||
|
global _io_futures
|
||||||
|
for fut in _io_futures:
|
||||||
|
try:
|
||||||
|
fut.result()
|
||||||
|
except Exception as e:
|
||||||
|
print(f">>> [async I/O] error: {e}")
|
||||||
|
_io_futures.clear()
|
||||||
|
|
||||||
|
|
||||||
|
atexit.register(_flush_io)
|
||||||
|
|
||||||
|
|
||||||
|
def _save_results_sync(video_cpu: Tensor, filename: str, fps: int) -> None:
|
||||||
|
"""Synchronous save on CPU tensor (runs in background thread)."""
|
||||||
|
video = torch.clamp(video_cpu.float(), -1., 1.)
|
||||||
|
n = video.shape[0]
|
||||||
|
video = video.permute(2, 0, 1, 3, 4)
|
||||||
|
frame_grids = [
|
||||||
|
torchvision.utils.make_grid(framesheet, nrow=int(n), padding=0)
|
||||||
|
for framesheet in video
|
||||||
|
]
|
||||||
|
grid = torch.stack(frame_grids, dim=0)
|
||||||
|
grid = (grid + 1.0) / 2.0
|
||||||
|
grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1)
|
||||||
|
torchvision.io.write_video(filename,
|
||||||
|
grid,
|
||||||
|
fps=fps,
|
||||||
|
video_codec='h264',
|
||||||
|
options={'crf': '10'})
|
||||||
|
|
||||||
|
|
||||||
|
def save_results_async(video: Tensor, filename: str, fps: int = 8) -> None:
|
||||||
|
"""Submit video saving to background thread pool."""
|
||||||
|
video_cpu = video.detach().cpu()
|
||||||
|
fut = _get_io_executor().submit(_save_results_sync, video_cpu, filename, fps)
|
||||||
|
_io_futures.append(fut)
|
||||||
|
|
||||||
|
|
||||||
|
def _log_to_tb_sync(writer, video_cpu: Tensor, tag: str, fps: int) -> None:
|
||||||
|
"""Synchronous TensorBoard log on CPU tensor (runs in background thread)."""
|
||||||
|
if video_cpu.dim() == 5:
|
||||||
|
n = video_cpu.shape[0]
|
||||||
|
video = video_cpu.permute(2, 0, 1, 3, 4)
|
||||||
|
frame_grids = [
|
||||||
|
torchvision.utils.make_grid(framesheet, nrow=int(n), padding=0)
|
||||||
|
for framesheet in video
|
||||||
|
]
|
||||||
|
grid = torch.stack(frame_grids, dim=0)
|
||||||
|
grid = (grid + 1.0) / 2.0
|
||||||
|
grid = grid.unsqueeze(dim=0)
|
||||||
|
writer.add_video(tag, grid, fps=fps)
|
||||||
|
|
||||||
|
|
||||||
|
def log_to_tensorboard_async(writer, data: Tensor, tag: str, fps: int = 10) -> None:
|
||||||
|
"""Submit TensorBoard logging to background thread pool."""
|
||||||
|
if isinstance(data, torch.Tensor) and data.dim() == 5:
|
||||||
|
data_cpu = data.detach().cpu()
|
||||||
|
fut = _get_io_executor().submit(_log_to_tb_sync, writer, data_cpu, tag, fps)
|
||||||
|
_io_futures.append(fut)
|
||||||
|
|
||||||
|
|
||||||
def get_init_frame_path(data_dir: str, sample: dict) -> str:
|
def get_init_frame_path(data_dir: str, sample: dict) -> str:
|
||||||
"""Construct the init_frame path from directory and sample metadata.
|
"""Construct the init_frame path from directory and sample metadata.
|
||||||
|
|
||||||
@@ -372,6 +450,7 @@ def image_guided_synthesis_sim_mode(
|
|||||||
|
|
||||||
img = observation['observation.images.top'].permute(0, 2, 1, 3, 4)
|
img = observation['observation.images.top'].permute(0, 2, 1, 3, 4)
|
||||||
cond_img = rearrange(img, 'b o c h w -> (b o) c h w')[-1:]
|
cond_img = rearrange(img, 'b o c h w -> (b o) c h w')[-1:]
|
||||||
|
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||||
cond_img_emb = model.embedder(cond_img)
|
cond_img_emb = model.embedder(cond_img)
|
||||||
cond_img_emb = model.image_proj_model(cond_img_emb)
|
cond_img_emb = model.image_proj_model(cond_img_emb)
|
||||||
|
|
||||||
@@ -387,6 +466,7 @@ def image_guided_synthesis_sim_mode(
|
|||||||
prompts = [""] * batch_size
|
prompts = [""] * batch_size
|
||||||
cond_ins_emb = model.get_learned_conditioning(prompts)
|
cond_ins_emb = model.get_learned_conditioning(prompts)
|
||||||
|
|
||||||
|
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||||
cond_state_emb = model.state_projector(observation['observation.state'])
|
cond_state_emb = model.state_projector(observation['observation.state'])
|
||||||
cond_state_emb = cond_state_emb + model.agent_state_pos_emb
|
cond_state_emb = cond_state_emb + model.agent_state_pos_emb
|
||||||
|
|
||||||
@@ -414,6 +494,7 @@ def image_guided_synthesis_sim_mode(
|
|||||||
cond_mask = None
|
cond_mask = None
|
||||||
cond_z0 = None
|
cond_z0 = None
|
||||||
batch_variants = None
|
batch_variants = None
|
||||||
|
samples = None
|
||||||
if ddim_sampler is not None:
|
if ddim_sampler is not None:
|
||||||
samples, actions, states, intermedia = ddim_sampler.sample(
|
samples, actions, states, intermedia = ddim_sampler.sample(
|
||||||
S=ddim_steps,
|
S=ddim_steps,
|
||||||
@@ -437,7 +518,7 @@ def image_guided_synthesis_sim_mode(
|
|||||||
batch_images = model.decode_first_stage(samples)
|
batch_images = model.decode_first_stage(samples)
|
||||||
batch_variants = batch_images
|
batch_variants = batch_images
|
||||||
|
|
||||||
return batch_variants, actions, states
|
return batch_variants, actions, states, samples
|
||||||
|
|
||||||
|
|
||||||
def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
||||||
@@ -462,26 +543,67 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
|||||||
csv_path = os.path.join(args.prompt_dir, f"{args.dataset}.csv")
|
csv_path = os.path.join(args.prompt_dir, f"{args.dataset}.csv")
|
||||||
df = pd.read_csv(csv_path)
|
df = pd.read_csv(csv_path)
|
||||||
|
|
||||||
# Load config
|
# Load config (always needed for data setup)
|
||||||
config = OmegaConf.load(args.config)
|
config = OmegaConf.load(args.config)
|
||||||
|
|
||||||
|
prepared_path = args.ckpt_path + ".prepared.pt"
|
||||||
|
if os.path.exists(prepared_path):
|
||||||
|
# ---- Fast path: load the fully-prepared model ----
|
||||||
|
print(f">>> Loading prepared model from {prepared_path} ...")
|
||||||
|
model = torch.load(prepared_path,
|
||||||
|
map_location=f"cuda:{gpu_no}",
|
||||||
|
weights_only=False,
|
||||||
|
mmap=True)
|
||||||
|
model.eval()
|
||||||
|
print(f">>> Prepared model loaded.")
|
||||||
|
else:
|
||||||
|
# ---- Normal path: construct + load checkpoint ----
|
||||||
config['model']['params']['wma_config']['params'][
|
config['model']['params']['wma_config']['params'][
|
||||||
'use_checkpoint'] = False
|
'use_checkpoint'] = False
|
||||||
model = instantiate_from_config(config.model)
|
model = instantiate_from_config(config.model)
|
||||||
model.perframe_ae = args.perframe_ae
|
model.perframe_ae = args.perframe_ae
|
||||||
|
|
||||||
assert os.path.exists(args.ckpt_path), "Error: checkpoint Not Found!"
|
assert os.path.exists(args.ckpt_path), "Error: checkpoint Not Found!"
|
||||||
model = load_model_checkpoint(model, args.ckpt_path)
|
model = load_model_checkpoint(model, args.ckpt_path)
|
||||||
model.eval()
|
model.eval()
|
||||||
|
model = model.cuda(gpu_no)
|
||||||
print(f'>>> Load pre-trained model ...')
|
print(f'>>> Load pre-trained model ...')
|
||||||
|
|
||||||
# Build unnomalizer
|
# Save prepared model for fast loading next time
|
||||||
|
print(f">>> Saving prepared model to {prepared_path} ...")
|
||||||
|
torch.save(model, prepared_path)
|
||||||
|
print(f">>> Prepared model saved ({os.path.getsize(prepared_path) / 1024**3:.1f} GB).")
|
||||||
|
|
||||||
|
# ---- FP16: convert diffusion backbone + conditioning modules ----
|
||||||
|
model.model.to(torch.float16)
|
||||||
|
model.model.diffusion_model.dtype = torch.float16
|
||||||
|
print(">>> Diffusion backbone (model.model) converted to FP16.")
|
||||||
|
|
||||||
|
# Projectors / MLP → FP16
|
||||||
|
model.image_proj_model.half()
|
||||||
|
model.state_projector.half()
|
||||||
|
model.action_projector.half()
|
||||||
|
print(">>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.")
|
||||||
|
|
||||||
|
# Text/image encoders → FP16
|
||||||
|
model.cond_stage_model.half()
|
||||||
|
model.embedder.half()
|
||||||
|
print(">>> Encoders (cond_stage_model, embedder) converted to FP16.")
|
||||||
|
|
||||||
|
# Build normalizer (always needed, independent of model loading path)
|
||||||
logging.info("***** Configing Data *****")
|
logging.info("***** Configing Data *****")
|
||||||
data = instantiate_from_config(config.data)
|
data = instantiate_from_config(config.data)
|
||||||
data.setup()
|
data.setup()
|
||||||
print(">>> Dataset is successfully loaded ...")
|
print(">>> Dataset is successfully loaded ...")
|
||||||
|
|
||||||
model = model.cuda(gpu_no)
|
|
||||||
device = get_device_from_parameters(model)
|
device = get_device_from_parameters(model)
|
||||||
|
|
||||||
|
# Fuse KV projections in attention layers (to_k + to_v → to_kv)
|
||||||
|
from unifolm_wma.modules.attention import CrossAttention
|
||||||
|
kv_count = sum(1 for m in model.modules()
|
||||||
|
if isinstance(m, CrossAttention) and m.fuse_kv())
|
||||||
|
print(f" ✓ KV fused: {kv_count} attention layers")
|
||||||
|
|
||||||
# Run over data
|
# Run over data
|
||||||
assert (args.height % 16 == 0) and (
|
assert (args.height % 16 == 0) and (
|
||||||
args.width % 16
|
args.width % 16
|
||||||
@@ -527,7 +649,7 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
|||||||
sample_save_dir = f'{video_save_dir}/wm/{fs}'
|
sample_save_dir = f'{video_save_dir}/wm/{fs}'
|
||||||
os.makedirs(sample_save_dir, exist_ok=True)
|
os.makedirs(sample_save_dir, exist_ok=True)
|
||||||
# For collecting interaction videos
|
# For collecting interaction videos
|
||||||
wm_video = []
|
wm_latent = []
|
||||||
# Initialize observation queues
|
# Initialize observation queues
|
||||||
cond_obs_queues = {
|
cond_obs_queues = {
|
||||||
"observation.images.top":
|
"observation.images.top":
|
||||||
@@ -583,7 +705,7 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
|||||||
|
|
||||||
# Use world-model in policy to generate action
|
# Use world-model in policy to generate action
|
||||||
print(f'>>> Step {itr}: generating actions ...')
|
print(f'>>> Step {itr}: generating actions ...')
|
||||||
pred_videos_0, pred_actions, _ = image_guided_synthesis_sim_mode(
|
pred_videos_0, pred_actions, _, _ = image_guided_synthesis_sim_mode(
|
||||||
model,
|
model,
|
||||||
sample['instruction'],
|
sample['instruction'],
|
||||||
observation,
|
observation,
|
||||||
@@ -625,7 +747,7 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
|||||||
|
|
||||||
# Interaction with the world-model
|
# Interaction with the world-model
|
||||||
print(f'>>> Step {itr}: interacting with world model ...')
|
print(f'>>> Step {itr}: interacting with world model ...')
|
||||||
pred_videos_1, _, pred_states = image_guided_synthesis_sim_mode(
|
pred_videos_1, _, pred_states, wm_samples = image_guided_synthesis_sim_mode(
|
||||||
model,
|
model,
|
||||||
"",
|
"",
|
||||||
observation,
|
observation,
|
||||||
@@ -638,12 +760,16 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
|||||||
fs=model_input_fs,
|
fs=model_input_fs,
|
||||||
text_input=False,
|
text_input=False,
|
||||||
timestep_spacing=args.timestep_spacing,
|
timestep_spacing=args.timestep_spacing,
|
||||||
guidance_rescale=args.guidance_rescale)
|
guidance_rescale=args.guidance_rescale,
|
||||||
|
decode_video=False)
|
||||||
|
|
||||||
|
# Decode only the last frame for CLIP embedding in next iteration
|
||||||
|
last_frame_pixel = model.decode_first_stage(wm_samples[:, :, -1:, :, :])
|
||||||
|
|
||||||
for idx in range(args.exe_steps):
|
for idx in range(args.exe_steps):
|
||||||
observation = {
|
observation = {
|
||||||
'observation.images.top':
|
'observation.images.top':
|
||||||
pred_videos_1[0][:, idx:idx + 1].permute(1, 0, 2, 3),
|
last_frame_pixel[0, :, 0:1].permute(1, 0, 2, 3),
|
||||||
'observation.state':
|
'observation.state':
|
||||||
torch.zeros_like(pred_states[0][idx:idx + 1]) if
|
torch.zeros_like(pred_states[0][idx:idx + 1]) if
|
||||||
args.zero_pred_state else pred_states[0][idx:idx + 1],
|
args.zero_pred_state else pred_states[0][idx:idx + 1],
|
||||||
@@ -654,44 +780,31 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
|||||||
cond_obs_queues = populate_queues(cond_obs_queues,
|
cond_obs_queues = populate_queues(cond_obs_queues,
|
||||||
observation)
|
observation)
|
||||||
|
|
||||||
# Save the imagen videos for decision-making
|
# Save the imagen videos for decision-making (async)
|
||||||
if pred_videos_0 is not None:
|
if pred_videos_0 is not None:
|
||||||
sample_tag = f"{args.dataset}-vid{sample['videoid']}-dm-fs-{fs}/itr-{itr}"
|
sample_tag = f"{args.dataset}-vid{sample['videoid']}-dm-fs-{fs}/itr-{itr}"
|
||||||
log_to_tensorboard(writer,
|
log_to_tensorboard_async(writer,
|
||||||
pred_videos_0,
|
pred_videos_0,
|
||||||
sample_tag,
|
sample_tag,
|
||||||
fps=args.save_fps)
|
fps=args.save_fps)
|
||||||
# Save videos environment changes via world-model interaction
|
|
||||||
sample_tag = f"{args.dataset}-vid{sample['videoid']}-wd-fs-{fs}/itr-{itr}"
|
|
||||||
log_to_tensorboard(writer,
|
|
||||||
pred_videos_1,
|
|
||||||
sample_tag,
|
|
||||||
fps=args.save_fps)
|
|
||||||
|
|
||||||
# Save the imagen videos for decision-making
|
|
||||||
if pred_videos_0 is not None:
|
|
||||||
sample_video_file = f'{video_save_dir}/dm/{fs}/itr-{itr}.mp4'
|
|
||||||
save_results(pred_videos_0.cpu(),
|
|
||||||
sample_video_file,
|
|
||||||
fps=args.save_fps)
|
|
||||||
# Save videos environment changes via world-model interaction
|
|
||||||
sample_video_file = f'{video_save_dir}/wm/{fs}/itr-{itr}.mp4'
|
|
||||||
save_results(pred_videos_1.cpu(),
|
|
||||||
sample_video_file,
|
|
||||||
fps=args.save_fps)
|
|
||||||
|
|
||||||
print('>' * 24)
|
print('>' * 24)
|
||||||
# Collect the result of world-model interactions
|
# Store raw latent for deferred decode
|
||||||
wm_video.append(pred_videos_1[:, :, :args.exe_steps].cpu())
|
wm_latent.append(wm_samples[:, :, :args.exe_steps].cpu())
|
||||||
|
|
||||||
full_video = torch.cat(wm_video, dim=2)
|
# Deferred decode: batch decode all stored latents
|
||||||
|
full_latent = torch.cat(wm_latent, dim=2).to(device)
|
||||||
|
full_video = model.decode_first_stage(full_latent).cpu()
|
||||||
sample_tag = f"{args.dataset}-vid{sample['videoid']}-wd-fs-{fs}/full"
|
sample_tag = f"{args.dataset}-vid{sample['videoid']}-wd-fs-{fs}/full"
|
||||||
log_to_tensorboard(writer,
|
log_to_tensorboard_async(writer,
|
||||||
full_video,
|
full_video,
|
||||||
sample_tag,
|
sample_tag,
|
||||||
fps=args.save_fps)
|
fps=args.save_fps)
|
||||||
sample_full_video_file = f"{video_save_dir}/../{sample['videoid']}_full_fs{fs}.mp4"
|
sample_full_video_file = f"{video_save_dir}/../{sample['videoid']}_full_fs{fs}.mp4"
|
||||||
save_results(full_video, sample_full_video_file, fps=args.save_fps)
|
save_results_async(full_video, sample_full_video_file, fps=args.save_fps)
|
||||||
|
|
||||||
|
# Wait for all async I/O to complete
|
||||||
|
_flush_io()
|
||||||
|
|
||||||
|
|
||||||
def get_parser():
|
def get_parser():
|
||||||
@@ -809,7 +922,7 @@ def get_parser():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--fast_policy_no_decode",
|
"--fast_policy_no_decode",
|
||||||
action='store_true',
|
action='store_true',
|
||||||
default=False,
|
default=True,
|
||||||
help="Speed mode: policy pass only predicts actions, skip policy video decode/log/save.")
|
help="Speed mode: policy pass only predicts actions, skip policy video decode/log/save.")
|
||||||
parser.add_argument("--save_fps",
|
parser.add_argument("--save_fps",
|
||||||
type=int,
|
type=int,
|
||||||
|
|||||||
@@ -988,7 +988,7 @@ class LatentDiffusion(DDPM):
|
|||||||
|
|
||||||
def instantiate_cond_stage(self, config: OmegaConf) -> None:
|
def instantiate_cond_stage(self, config: OmegaConf) -> None:
|
||||||
"""
|
"""
|
||||||
Build the conditioning stage model.
|
Build the conditioning stage model. Frozen models are converted to FP16.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
config: OmegaConf config describing the conditioning model to instantiate.
|
config: OmegaConf config describing the conditioning model to instantiate.
|
||||||
@@ -1000,6 +1000,7 @@ class LatentDiffusion(DDPM):
|
|||||||
self.cond_stage_model.train = disabled_train
|
self.cond_stage_model.train = disabled_train
|
||||||
for param in self.cond_stage_model.parameters():
|
for param in self.cond_stage_model.parameters():
|
||||||
param.requires_grad = False
|
param.requires_grad = False
|
||||||
|
self.cond_stage_model.half()
|
||||||
else:
|
else:
|
||||||
model = instantiate_from_config(config)
|
model = instantiate_from_config(config)
|
||||||
self.cond_stage_model = model
|
self.cond_stage_model = model
|
||||||
@@ -1014,6 +1015,7 @@ class LatentDiffusion(DDPM):
|
|||||||
Returns:
|
Returns:
|
||||||
Conditioning embedding as a tensor (shape depends on cond model).
|
Conditioning embedding as a tensor (shape depends on cond model).
|
||||||
"""
|
"""
|
||||||
|
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||||
if self.cond_stage_forward is None:
|
if self.cond_stage_forward is None:
|
||||||
if hasattr(self.cond_stage_model, 'encode') and callable(
|
if hasattr(self.cond_stage_model, 'encode') and callable(
|
||||||
self.cond_stage_model.encode):
|
self.cond_stage_model.encode):
|
||||||
@@ -1957,6 +1959,7 @@ class LatentVisualDiffusion(LatentDiffusion):
|
|||||||
self.image_proj_model.train = disabled_train
|
self.image_proj_model.train = disabled_train
|
||||||
for param in self.image_proj_model.parameters():
|
for param in self.image_proj_model.parameters():
|
||||||
param.requires_grad = False
|
param.requires_grad = False
|
||||||
|
self.image_proj_model.half()
|
||||||
|
|
||||||
def _init_embedder(self, config: OmegaConf, freeze: bool = True) -> None:
|
def _init_embedder(self, config: OmegaConf, freeze: bool = True) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -1972,6 +1975,7 @@ class LatentVisualDiffusion(LatentDiffusion):
|
|||||||
self.embedder.train = disabled_train
|
self.embedder.train = disabled_train
|
||||||
for param in self.embedder.parameters():
|
for param in self.embedder.parameters():
|
||||||
param.requires_grad = False
|
param.requires_grad = False
|
||||||
|
self.embedder.half()
|
||||||
|
|
||||||
def init_normalizers(self, normalize_config: OmegaConf,
|
def init_normalizers(self, normalize_config: OmegaConf,
|
||||||
dataset_stats: Mapping[str, Any]) -> None:
|
dataset_stats: Mapping[str, Any]) -> None:
|
||||||
@@ -2175,6 +2179,7 @@ class LatentVisualDiffusion(LatentDiffusion):
|
|||||||
(random_num < 3 * self.uncond_prob).float(), "n -> n 1 1 1")
|
(random_num < 3 * self.uncond_prob).float(), "n -> n 1 1 1")
|
||||||
|
|
||||||
cond_img = input_mask * img
|
cond_img = input_mask * img
|
||||||
|
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||||
cond_img_emb = self.embedder(cond_img)
|
cond_img_emb = self.embedder(cond_img)
|
||||||
cond_img_emb = self.image_proj_model(cond_img_emb)
|
cond_img_emb = self.image_proj_model(cond_img_emb)
|
||||||
|
|
||||||
@@ -2191,6 +2196,7 @@ class LatentVisualDiffusion(LatentDiffusion):
|
|||||||
repeat=z.shape[2])
|
repeat=z.shape[2])
|
||||||
cond["c_concat"] = [img_cat_cond]
|
cond["c_concat"] = [img_cat_cond]
|
||||||
|
|
||||||
|
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||||
cond_action = self.action_projector(action)
|
cond_action = self.action_projector(action)
|
||||||
cond_action_emb = self.agent_action_pos_emb + cond_action
|
cond_action_emb = self.agent_action_pos_emb + cond_action
|
||||||
# Get conditioning states
|
# Get conditioning states
|
||||||
@@ -2457,7 +2463,17 @@ class DiffusionWrapper(pl.LightningModule):
|
|||||||
Returns:
|
Returns:
|
||||||
Output from the inner diffusion model (tensor or tuple, depending on the model).
|
Output from the inner diffusion model (tensor or tuple, depending on the model).
|
||||||
"""
|
"""
|
||||||
|
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||||
|
return self._forward_impl(x, x_action, x_state, t,
|
||||||
|
c_concat, c_crossattn, c_crossattn_action,
|
||||||
|
c_adm, s, mask, **kwargs)
|
||||||
|
|
||||||
|
def _forward_impl(
|
||||||
|
self,
|
||||||
|
x, x_action, x_state, t,
|
||||||
|
c_concat=None, c_crossattn=None, c_crossattn_action=None,
|
||||||
|
c_adm=None, s=None, mask=None, **kwargs,
|
||||||
|
):
|
||||||
if self.conditioning_key is None:
|
if self.conditioning_key is None:
|
||||||
out = self.diffusion_model(x, t)
|
out = self.diffusion_model(x, t)
|
||||||
elif self.conditioning_key == 'concat':
|
elif self.conditioning_key == 'concat':
|
||||||
|
|||||||
@@ -100,6 +100,7 @@ class CrossAttention(nn.Module):
|
|||||||
self.agent_action_context_len = agent_action_context_len
|
self.agent_action_context_len = agent_action_context_len
|
||||||
self._kv_cache = {}
|
self._kv_cache = {}
|
||||||
self._kv_cache_enabled = False
|
self._kv_cache_enabled = False
|
||||||
|
self._kv_fused = False
|
||||||
|
|
||||||
self.cross_attention_scale_learnable = cross_attention_scale_learnable
|
self.cross_attention_scale_learnable = cross_attention_scale_learnable
|
||||||
if self.image_cross_attention:
|
if self.image_cross_attention:
|
||||||
@@ -117,6 +118,27 @@ class CrossAttention(nn.Module):
|
|||||||
self.register_parameter('alpha_caa',
|
self.register_parameter('alpha_caa',
|
||||||
nn.Parameter(torch.tensor(0.)))
|
nn.Parameter(torch.tensor(0.)))
|
||||||
|
|
||||||
|
def fuse_kv(self):
|
||||||
|
"""Fuse to_k/to_v into to_kv (2 Linear → 1). Works for all layers."""
|
||||||
|
k_w = self.to_k.weight # (inner_dim, context_dim)
|
||||||
|
v_w = self.to_v.weight
|
||||||
|
self.to_kv = nn.Linear(k_w.shape[1], k_w.shape[0] * 2, bias=False)
|
||||||
|
self.to_kv.weight = nn.Parameter(torch.cat([k_w, v_w], dim=0))
|
||||||
|
del self.to_k, self.to_v
|
||||||
|
if self.image_cross_attention:
|
||||||
|
for suffix in ('_ip', '_as', '_aa'):
|
||||||
|
k_attr = f'to_k{suffix}'
|
||||||
|
v_attr = f'to_v{suffix}'
|
||||||
|
kw = getattr(self, k_attr).weight
|
||||||
|
vw = getattr(self, v_attr).weight
|
||||||
|
fused = nn.Linear(kw.shape[1], kw.shape[0] * 2, bias=False)
|
||||||
|
fused.weight = nn.Parameter(torch.cat([kw, vw], dim=0))
|
||||||
|
setattr(self, f'to_kv{suffix}', fused)
|
||||||
|
delattr(self, k_attr)
|
||||||
|
delattr(self, v_attr)
|
||||||
|
self._kv_fused = True
|
||||||
|
return True
|
||||||
|
|
||||||
def forward(self, x, context=None, mask=None):
|
def forward(self, x, context=None, mask=None):
|
||||||
spatial_self_attn = (context is None)
|
spatial_self_attn = (context is None)
|
||||||
k_ip, v_ip, out_ip = None, None, None
|
k_ip, v_ip, out_ip = None, None, None
|
||||||
@@ -143,6 +165,12 @@ class CrossAttention(nn.Module):
|
|||||||
self.agent_action_context_len +
|
self.agent_action_context_len +
|
||||||
self.text_context_len:, :]
|
self.text_context_len:, :]
|
||||||
|
|
||||||
|
if self._kv_fused:
|
||||||
|
k, v = self.to_kv(context_ins).chunk(2, dim=-1)
|
||||||
|
k_ip, v_ip = self.to_kv_ip(context_image).chunk(2, dim=-1)
|
||||||
|
k_as, v_as = self.to_kv_as(context_agent_state).chunk(2, dim=-1)
|
||||||
|
k_aa, v_aa = self.to_kv_aa(context_agent_action).chunk(2, dim=-1)
|
||||||
|
else:
|
||||||
k = self.to_k(context_ins)
|
k = self.to_k(context_ins)
|
||||||
v = self.to_v(context_ins)
|
v = self.to_v(context_ins)
|
||||||
k_ip = self.to_k_ip(context_image)
|
k_ip = self.to_k_ip(context_image)
|
||||||
@@ -154,6 +182,9 @@ class CrossAttention(nn.Module):
|
|||||||
else:
|
else:
|
||||||
if not spatial_self_attn:
|
if not spatial_self_attn:
|
||||||
context = context[:, :self.text_context_len, :]
|
context = context[:, :self.text_context_len, :]
|
||||||
|
if self._kv_fused:
|
||||||
|
k, v = self.to_kv(context).chunk(2, dim=-1)
|
||||||
|
else:
|
||||||
k = self.to_k(context)
|
k = self.to_k(context)
|
||||||
v = self.to_v(context)
|
v = self.to_v(context)
|
||||||
|
|
||||||
@@ -267,6 +298,10 @@ class CrossAttention(nn.Module):
|
|||||||
elif self.image_cross_attention and not spatial_self_attn:
|
elif self.image_cross_attention and not spatial_self_attn:
|
||||||
if context.shape[1] == self.text_context_len + self.video_length:
|
if context.shape[1] == self.text_context_len + self.video_length:
|
||||||
context_ins, context_image = context[:, :self.text_context_len, :], context[:,self.text_context_len:, :]
|
context_ins, context_image = context[:, :self.text_context_len, :], context[:,self.text_context_len:, :]
|
||||||
|
if self._kv_fused:
|
||||||
|
k, v = self.to_kv(context).chunk(2, dim=-1)
|
||||||
|
k_ip, v_ip = self.to_kv_ip(context_image).chunk(2, dim=-1)
|
||||||
|
else:
|
||||||
k = self.to_k(context)
|
k = self.to_k(context)
|
||||||
v = self.to_v(context)
|
v = self.to_v(context)
|
||||||
k_ip = self.to_k_ip(context_image)
|
k_ip = self.to_k_ip(context_image)
|
||||||
@@ -279,6 +314,11 @@ class CrossAttention(nn.Module):
|
|||||||
context_agent_state = context[:, :self.agent_state_context_len, :]
|
context_agent_state = context[:, :self.agent_state_context_len, :]
|
||||||
context_ins = context[:, self.agent_state_context_len:self.agent_state_context_len+self.text_context_len, :]
|
context_ins = context[:, self.agent_state_context_len:self.agent_state_context_len+self.text_context_len, :]
|
||||||
context_image = context[:, self.agent_state_context_len+self.text_context_len:, :]
|
context_image = context[:, self.agent_state_context_len+self.text_context_len:, :]
|
||||||
|
if self._kv_fused:
|
||||||
|
k, v = self.to_kv(context_ins).chunk(2, dim=-1)
|
||||||
|
k_ip, v_ip = self.to_kv_ip(context_image).chunk(2, dim=-1)
|
||||||
|
k_as, v_as = self.to_kv_as(context_agent_state).chunk(2, dim=-1)
|
||||||
|
else:
|
||||||
k = self.to_k(context_ins)
|
k = self.to_k(context_ins)
|
||||||
v = self.to_v(context_ins)
|
v = self.to_v(context_ins)
|
||||||
k_ip = self.to_k_ip(context_image)
|
k_ip = self.to_k_ip(context_image)
|
||||||
@@ -296,6 +336,12 @@ class CrossAttention(nn.Module):
|
|||||||
context_ins = context[:, self.agent_state_context_len+self.agent_action_context_len:self.agent_state_context_len+self.agent_action_context_len+self.text_context_len, :]
|
context_ins = context[:, self.agent_state_context_len+self.agent_action_context_len:self.agent_state_context_len+self.agent_action_context_len+self.text_context_len, :]
|
||||||
context_image = context[:, self.agent_state_context_len+self.agent_action_context_len+self.text_context_len:, :]
|
context_image = context[:, self.agent_state_context_len+self.agent_action_context_len+self.text_context_len:, :]
|
||||||
|
|
||||||
|
if self._kv_fused:
|
||||||
|
k, v = self.to_kv(context_ins).chunk(2, dim=-1)
|
||||||
|
k_ip, v_ip = self.to_kv_ip(context_image).chunk(2, dim=-1)
|
||||||
|
k_as, v_as = self.to_kv_as(context_agent_state).chunk(2, dim=-1)
|
||||||
|
k_aa, v_aa = self.to_kv_aa(context_agent_action).chunk(2, dim=-1)
|
||||||
|
else:
|
||||||
k = self.to_k(context_ins)
|
k = self.to_k(context_ins)
|
||||||
v = self.to_v(context_ins)
|
v = self.to_v(context_ins)
|
||||||
k_ip = self.to_k_ip(context_image)
|
k_ip = self.to_k_ip(context_image)
|
||||||
@@ -328,6 +374,9 @@ class CrossAttention(nn.Module):
|
|||||||
if not spatial_self_attn:
|
if not spatial_self_attn:
|
||||||
assert 1 > 2, ">>> ERROR: you should never go into here ..."
|
assert 1 > 2, ">>> ERROR: you should never go into here ..."
|
||||||
context = context[:, :self.text_context_len, :]
|
context = context[:, :self.text_context_len, :]
|
||||||
|
if self._kv_fused:
|
||||||
|
k, v = self.to_kv(context).chunk(2, dim=-1)
|
||||||
|
else:
|
||||||
k = self.to_k(context)
|
k = self.to_k(context)
|
||||||
v = self.to_v(context)
|
v = self.to_v(context)
|
||||||
k, v = map(_reshape_kv, (k, v))
|
k, v = map(_reshape_kv, (k, v))
|
||||||
|
|||||||
@@ -688,6 +688,17 @@ class WMAModel(nn.Module):
|
|||||||
# Context precomputation cache
|
# Context precomputation cache
|
||||||
self._ctx_cache_enabled = False
|
self._ctx_cache_enabled = False
|
||||||
self._ctx_cache = {}
|
self._ctx_cache = {}
|
||||||
|
# Reusable CUDA stream for parallel state_unet / action_unet
|
||||||
|
self._state_stream = torch.cuda.Stream()
|
||||||
|
|
||||||
|
def __getstate__(self):
|
||||||
|
state = self.__dict__.copy()
|
||||||
|
state.pop('_state_stream', None)
|
||||||
|
return state
|
||||||
|
|
||||||
|
def __setstate__(self, state):
|
||||||
|
self.__dict__.update(state)
|
||||||
|
self._state_stream = torch.cuda.Stream()
|
||||||
|
|
||||||
def forward(self,
|
def forward(self,
|
||||||
x: Tensor,
|
x: Tensor,
|
||||||
@@ -842,15 +853,16 @@ class WMAModel(nn.Module):
|
|||||||
|
|
||||||
if not self.base_model_gen_only:
|
if not self.base_model_gen_only:
|
||||||
ba, _, _ = x_action.shape
|
ba, _, _ = x_action.shape
|
||||||
|
ts_state = timesteps[:ba] if b > 1 else timesteps
|
||||||
|
# Run action_unet and state_unet in parallel via CUDA streams
|
||||||
|
s_stream = self._state_stream
|
||||||
|
s_stream.wait_stream(torch.cuda.current_stream())
|
||||||
|
with torch.cuda.stream(s_stream):
|
||||||
|
s_y = self.state_unet(x_state, ts_state, hs_a,
|
||||||
|
context_action[:2], **kwargs)
|
||||||
a_y = self.action_unet(x_action, timesteps[:ba], hs_a,
|
a_y = self.action_unet(x_action, timesteps[:ba], hs_a,
|
||||||
context_action[:2], **kwargs)
|
context_action[:2], **kwargs)
|
||||||
# Predict state
|
torch.cuda.current_stream().wait_stream(s_stream)
|
||||||
if b > 1:
|
|
||||||
s_y = self.state_unet(x_state, timesteps[:ba], hs_a,
|
|
||||||
context_action[:2], **kwargs)
|
|
||||||
else:
|
|
||||||
s_y = self.state_unet(x_state, timesteps, hs_a,
|
|
||||||
context_action[:2], **kwargs)
|
|
||||||
else:
|
else:
|
||||||
a_y = torch.zeros_like(x_action)
|
a_y = torch.zeros_like(x_action)
|
||||||
s_y = torch.zeros_like(x_state)
|
s_y = torch.zeros_like(x_state)
|
||||||
|
|||||||
123
unitree_g1_pack_camera/case1/output.log
Normal file
123
unitree_g1_pack_camera/case1/output.log
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
2026-02-11 19:14:09.599811: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 19:14:09.649058: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 19:14:09.649103: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 19:14:09.650392: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 19:14:09.657857: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 19:14:10.584900: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/11 [00:00<?, ?it/s]
|
||||||
|
9%|▉ | 1/11 [00:22<03:49, 22.92s/it]
|
||||||
|
18%|█▊ | 2/11 [00:45<03:22, 22.52s/it]
|
||||||
|
27%|██▋ | 3/11 [01:07<03:00, 22.52s/it]
|
||||||
|
36%|███▋ | 4/11 [01:30<02:38, 22.60s/it]
|
||||||
|
45%|████▌ | 5/11 [01:53<02:16, 22.70s/it]
|
||||||
|
55%|█████▍ | 6/11 [02:16<01:53, 22.74s/it]
|
||||||
|
64%|██████▎ | 7/11 [02:38<01:31, 22.76s/it]
|
||||||
|
73%|███████▎ | 8/11 [03:01<01:08, 22.77s/it]
|
||||||
|
82%|████████▏ | 9/11 [03:24<00:45, 22.76s/it]
|
||||||
|
91%|█████████ | 10/11 [03:47<00:22, 22.76s/it]
|
||||||
|
100%|██████████| 11/11 [04:09<00:00, 22.77s/it]
|
||||||
|
100%|██████████| 11/11 [04:09<00:00, 22.73s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
5
unitree_g1_pack_camera/case1/psnr_result.json
Normal file
5
unitree_g1_pack_camera/case1/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_g1_pack_camera/case1/unitree_g1_pack_camera_case1.mp4",
|
||||||
|
"pred_video": "unitree_g1_pack_camera/case1/output/inference/0_full_fs6.mp4",
|
||||||
|
"psnr": 32.340256576190384
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_g1_pack_camera"
|
|||||||
--n_iter 11 \
|
--n_iter 11 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
123
unitree_g1_pack_camera/case2/output.log
Normal file
123
unitree_g1_pack_camera/case2/output.log
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
2026-02-11 17:41:30.163933: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 17:41:30.213409: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 17:41:30.213453: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 17:41:30.214760: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 17:41:30.222233: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 17:41:31.146811: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/11 [00:00<?, ?it/s]
|
||||||
|
9%|▉ | 1/11 [00:23<03:53, 23.40s/it]
|
||||||
|
18%|█▊ | 2/11 [00:46<03:26, 23.00s/it]
|
||||||
|
27%|██▋ | 3/11 [01:08<03:03, 22.93s/it]
|
||||||
|
36%|███▋ | 4/11 [01:31<02:40, 22.88s/it]
|
||||||
|
45%|████▌ | 5/11 [01:54<02:17, 22.86s/it]
|
||||||
|
55%|█████▍ | 6/11 [02:17<01:54, 22.84s/it]
|
||||||
|
64%|██████▎ | 7/11 [02:40<01:31, 22.82s/it]
|
||||||
|
73%|███████▎ | 8/11 [03:02<01:08, 22.80s/it]
|
||||||
|
82%|████████▏ | 9/11 [03:25<00:45, 22.78s/it]
|
||||||
|
91%|█████████ | 10/11 [03:48<00:22, 22.77s/it]
|
||||||
|
100%|██████████| 11/11 [04:11<00:00, 22.76s/it]
|
||||||
|
100%|██████████| 11/11 [04:11<00:00, 22.83s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
5
unitree_g1_pack_camera/case2/psnr_result.json
Normal file
5
unitree_g1_pack_camera/case2/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_g1_pack_camera/case2/unitree_g1_pack_camera_case2.mp4",
|
||||||
|
"pred_video": "unitree_g1_pack_camera/case2/output/inference/50_full_fs6.mp4",
|
||||||
|
"psnr": 37.49178506869336
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_g1_pack_camera"
|
|||||||
--n_iter 11 \
|
--n_iter 11 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
123
unitree_g1_pack_camera/case3/output.log
Normal file
123
unitree_g1_pack_camera/case3/output.log
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
2026-02-11 17:46:20.925463: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 17:46:20.976293: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 17:46:20.976338: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 17:46:20.977650: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 17:46:20.985133: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 17:46:21.909964: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/11 [00:00<?, ?it/s]
|
||||||
|
9%|▉ | 1/11 [00:23<03:54, 23.50s/it]
|
||||||
|
18%|█▊ | 2/11 [00:46<03:27, 23.07s/it]
|
||||||
|
27%|██▋ | 3/11 [01:09<03:03, 22.99s/it]
|
||||||
|
36%|███▋ | 4/11 [01:32<02:40, 22.94s/it]
|
||||||
|
45%|████▌ | 5/11 [01:54<02:17, 22.90s/it]
|
||||||
|
55%|█████▍ | 6/11 [02:17<01:54, 22.87s/it]
|
||||||
|
64%|██████▎ | 7/11 [02:40<01:31, 22.85s/it]
|
||||||
|
73%|███████▎ | 8/11 [03:03<01:08, 22.83s/it]
|
||||||
|
82%|████████▏ | 9/11 [03:26<00:45, 22.81s/it]
|
||||||
|
91%|█████████ | 10/11 [03:48<00:22, 22.78s/it]
|
||||||
|
100%|██████████| 11/11 [04:11<00:00, 22.76s/it]
|
||||||
|
100%|██████████| 11/11 [04:11<00:00, 22.86s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
5
unitree_g1_pack_camera/case3/psnr_result.json
Normal file
5
unitree_g1_pack_camera/case3/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_g1_pack_camera/case3/unitree_g1_pack_camera_case3.mp4",
|
||||||
|
"pred_video": "unitree_g1_pack_camera/case3/output/inference/100_full_fs6.mp4",
|
||||||
|
"psnr": 29.88155122131729
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_g1_pack_camera"
|
|||||||
--n_iter 11 \
|
--n_iter 11 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
123
unitree_g1_pack_camera/case4/output.log
Normal file
123
unitree_g1_pack_camera/case4/output.log
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
2026-02-11 17:51:11.566934: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 17:51:11.616260: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 17:51:11.616305: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 17:51:11.617626: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 17:51:11.625103: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 17:51:12.538539: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/11 [00:00<?, ?it/s]
|
||||||
|
9%|▉ | 1/11 [00:23<03:53, 23.39s/it]
|
||||||
|
18%|█▊ | 2/11 [00:46<03:26, 22.96s/it]
|
||||||
|
27%|██▋ | 3/11 [01:08<03:03, 22.89s/it]
|
||||||
|
36%|███▋ | 4/11 [01:31<02:40, 22.86s/it]
|
||||||
|
45%|████▌ | 5/11 [01:54<02:16, 22.82s/it]
|
||||||
|
55%|█████▍ | 6/11 [02:17<01:54, 22.80s/it]
|
||||||
|
64%|██████▎ | 7/11 [02:39<01:31, 22.77s/it]
|
||||||
|
73%|███████▎ | 8/11 [03:02<01:08, 22.75s/it]
|
||||||
|
82%|████████▏ | 9/11 [03:25<00:45, 22.73s/it]
|
||||||
|
91%|█████████ | 10/11 [03:47<00:22, 22.72s/it]
|
||||||
|
100%|██████████| 11/11 [04:10<00:00, 22.73s/it]
|
||||||
|
100%|██████████| 11/11 [04:10<00:00, 22.79s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
5
unitree_g1_pack_camera/case4/psnr_result.json
Normal file
5
unitree_g1_pack_camera/case4/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_g1_pack_camera/case4/unitree_g1_pack_camera_case4.mp4",
|
||||||
|
"pred_video": "unitree_g1_pack_camera/case4/output/inference/200_full_fs6.mp4",
|
||||||
|
"psnr": 35.62512454155058
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_g1_pack_camera"
|
|||||||
--n_iter 11 \
|
--n_iter 11 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
@@ -1,24 +1,16 @@
|
|||||||
2026-02-10 15:38:28.973314: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
2026-02-11 17:56:01.170137: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
2026-02-10 15:38:29.023024: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
2026-02-11 17:56:01.219541: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
2026-02-10 15:38:29.023070: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
2026-02-11 17:56:01.219584: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
2026-02-10 15:38:29.024393: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
2026-02-11 17:56:01.220897: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
2026-02-10 15:38:29.031901: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
2026-02-11 17:56:01.228350: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
2026-02-10 15:38:29.955454: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
2026-02-11 17:56:02.145344: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
Global seed set to 123
|
Global seed set to 123
|
||||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
>>> Prepared model loaded.
|
||||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
INFO:root:Loaded ViT-H-14 model config.
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): hf-mirror.com:443
|
|
||||||
DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
|
|
||||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
|
||||||
INFO:root:Loaded ViT-H-14 model config.
|
|
||||||
DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
|
|
||||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
|
||||||
>>> model checkpoint loaded.
|
|
||||||
>>> Load pre-trained model ...
|
|
||||||
INFO:root:***** Configing Data *****
|
INFO:root:***** Configing Data *****
|
||||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
>>> unitree_z1_stackbox: data stats loaded.
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
@@ -36,13 +28,37 @@ INFO:root:***** Configing Data *****
|
|||||||
>>> unitree_g1_pack_camera: data stats loaded.
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
>>> Dataset is successfully loaded ...
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
>>> Generate 16 frames under each generation ...
|
>>> Generate 16 frames under each generation ...
|
||||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/8 [00:00<?, ?it/s]
|
||||||
12%|█▎ | 1/8 [00:23<02:43, 23.34s/it]
|
12%|█▎ | 1/8 [00:23<02:43, 23.34s/it]
|
||||||
|
25%|██▌ | 2/8 [00:46<02:17, 22.96s/it]
|
||||||
|
38%|███▊ | 3/8 [01:08<01:54, 22.88s/it]
|
||||||
|
50%|█████ | 4/8 [01:31<01:31, 22.82s/it]
|
||||||
|
62%|██████▎ | 5/8 [01:54<01:08, 22.78s/it]
|
||||||
|
75%|███████▌ | 6/8 [02:16<00:45, 22.76s/it]
|
||||||
|
88%|████████▊ | 7/8 [02:39<00:22, 22.73s/it]
|
||||||
|
100%|██████████| 8/8 [03:02<00:00, 22.72s/it]
|
||||||
|
100%|██████████| 8/8 [03:02<00:00, 22.79s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
>>> Step 5: generating actions ...
|
>>> Step 5: generating actions ...
|
||||||
>>> Step 5: interacting with world model ...
|
>>> Step 5: interacting with world model ...
|
||||||
@@ -92,30 +108,7 @@ DEBUG:PIL.Image:Importing WmfImagePlugin
|
|||||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
|
||||||
|
|
||||||
12%|█▎ | 1/8 [01:14<08:41, 74.51s/it]
|
|
||||||
25%|██▌ | 2/8 [02:29<07:28, 74.79s/it]
|
|
||||||
38%|███▊ | 3/8 [03:44<06:14, 74.81s/it]
|
|
||||||
50%|█████ | 4/8 [04:59<04:59, 74.78s/it]
|
|
||||||
62%|██████▎ | 5/8 [06:13<03:44, 74.73s/it]
|
|
||||||
75%|███████▌ | 6/8 [07:28<02:29, 74.66s/it]
|
|
||||||
88%|████████▊ | 7/8 [08:42<01:14, 74.56s/it]
|
|
||||||
100%|██████████| 8/8 [09:56<00:00, 74.51s/it]
|
|
||||||
100%|██████████| 8/8 [09:56<00:00, 74.62s/it]
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 1: generating actions ...
|
|
||||||
>>> Step 1: interacting with world model ...
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 2: generating actions ...
|
|
||||||
>>> Step 2: interacting with world model ...
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 3: generating actions ...
|
|
||||||
>>> Step 3: interacting with world model ...
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 4: generating actions ...
|
|
||||||
>>> Step 4: interacting with world model ...
|
|
||||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
>>> Step 5: generating actions ...
|
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||||
>>> Step 5: interacting with world model ...
|
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case1/unitree_z1_dual_arm_cleanup_pencils_case1.mp4",
|
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case1/unitree_z1_dual_arm_cleanup_pencils_case1.mp4",
|
||||||
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case1/output/inference/0_full_fs4.mp4",
|
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case1/output/inference/0_full_fs4.mp4",
|
||||||
"psnr": 47.911564449209735
|
"psnr": 38.269577028444445
|
||||||
}
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_cleanup_pencils"
|
|||||||
--n_iter 8 \
|
--n_iter 8 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
114
unitree_z1_dual_arm_cleanup_pencils/case2/output.log
Normal file
114
unitree_z1_dual_arm_cleanup_pencils/case2/output.log
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
2026-02-11 17:59:40.132715: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 17:59:40.183410: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 17:59:40.183456: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 17:59:40.184784: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 17:59:40.192307: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 17:59:41.105025: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/8 [00:00<?, ?it/s]
|
||||||
|
12%|█▎ | 1/8 [00:23<02:43, 23.41s/it]
|
||||||
|
25%|██▌ | 2/8 [00:46<02:18, 23.00s/it]
|
||||||
|
38%|███▊ | 3/8 [01:08<01:54, 22.94s/it]
|
||||||
|
50%|█████ | 4/8 [01:31<01:31, 22.86s/it]
|
||||||
|
62%|██████▎ | 5/8 [01:54<01:08, 22.82s/it]
|
||||||
|
75%|███████▌ | 6/8 [02:17<00:45, 22.78s/it]
|
||||||
|
88%|████████▊ | 7/8 [02:39<00:22, 22.77s/it]
|
||||||
|
100%|██████████| 8/8 [03:02<00:00, 22.75s/it]
|
||||||
|
100%|██████████| 8/8 [03:02<00:00, 22.83s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case2/unitree_z1_dual_arm_cleanup_pencils_case2.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case2/output/inference/50_full_fs4.mp4",
|
||||||
|
"psnr": 44.38754096950435
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_cleanup_pencils"
|
|||||||
--n_iter 8 \
|
--n_iter 8 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
114
unitree_z1_dual_arm_cleanup_pencils/case3/output.log
Normal file
114
unitree_z1_dual_arm_cleanup_pencils/case3/output.log
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
2026-02-11 18:03:19.373691: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:03:19.423144: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:03:19.423201: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:03:19.424504: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:03:19.431968: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:03:20.342432: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/8 [00:00<?, ?it/s]
|
||||||
|
12%|█▎ | 1/8 [00:23<02:44, 23.45s/it]
|
||||||
|
25%|██▌ | 2/8 [00:46<02:17, 22.99s/it]
|
||||||
|
38%|███▊ | 3/8 [01:09<01:54, 22.94s/it]
|
||||||
|
50%|█████ | 4/8 [01:31<01:31, 22.89s/it]
|
||||||
|
62%|██████▎ | 5/8 [01:54<01:08, 22.84s/it]
|
||||||
|
75%|███████▌ | 6/8 [02:17<00:45, 22.82s/it]
|
||||||
|
88%|████████▊ | 7/8 [02:40<00:22, 22.81s/it]
|
||||||
|
100%|██████████| 8/8 [03:02<00:00, 22.79s/it]
|
||||||
|
100%|██████████| 8/8 [03:02<00:00, 22.86s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case3/unitree_z1_dual_arm_cleanup_pencils_case3.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case3/output/inference/100_full_fs4.mp4",
|
||||||
|
"psnr": 32.29959078097713
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_cleanup_pencils"
|
|||||||
--n_iter 8 \
|
--n_iter 8 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
114
unitree_z1_dual_arm_cleanup_pencils/case4/output.log
Normal file
114
unitree_z1_dual_arm_cleanup_pencils/case4/output.log
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
2026-02-11 18:06:58.863806: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:06:58.913518: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:06:58.913565: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:06:58.914918: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:06:58.922497: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:06:59.840461: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/8 [00:00<?, ?it/s]
|
||||||
|
12%|█▎ | 1/8 [00:23<02:44, 23.47s/it]
|
||||||
|
25%|██▌ | 2/8 [00:46<02:18, 23.01s/it]
|
||||||
|
38%|███▊ | 3/8 [01:09<01:54, 22.94s/it]
|
||||||
|
50%|█████ | 4/8 [01:31<01:31, 22.89s/it]
|
||||||
|
62%|██████▎ | 5/8 [01:54<01:08, 22.85s/it]
|
||||||
|
75%|███████▌ | 6/8 [02:17<00:45, 22.81s/it]
|
||||||
|
88%|████████▊ | 7/8 [02:40<00:22, 22.79s/it]
|
||||||
|
100%|██████████| 8/8 [03:02<00:00, 22.77s/it]
|
||||||
|
100%|██████████| 8/8 [03:02<00:00, 22.85s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case4/unitree_z1_dual_arm_cleanup_pencils_case4.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case4/output/inference/200_full_fs4.mp4",
|
||||||
|
"psnr": 45.051241961122535
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_cleanup_pencils"
|
|||||||
--n_iter 8 \
|
--n_iter 8 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
111
unitree_z1_dual_arm_stackbox/case1/output.log
Normal file
111
unitree_z1_dual_arm_stackbox/case1/output.log
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
2026-02-11 18:10:38.361867: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:10:38.412126: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:10:38.412182: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:10:38.413493: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:10:38.420963: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:10:39.335981: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/7 [00:00<?, ?it/s]
|
||||||
|
14%|█▍ | 1/7 [00:23<02:20, 23.41s/it]
|
||||||
|
29%|██▊ | 2/7 [00:46<01:54, 22.99s/it]
|
||||||
|
43%|████▎ | 3/7 [01:08<01:31, 22.92s/it]
|
||||||
|
57%|█████▋ | 4/7 [01:31<01:08, 22.88s/it]
|
||||||
|
71%|███████▏ | 5/7 [01:54<00:45, 22.82s/it]
|
||||||
|
86%|████████▌ | 6/7 [02:17<00:22, 22.79s/it]
|
||||||
|
100%|██████████| 7/7 [02:39<00:00, 22.75s/it]
|
||||||
|
100%|██████████| 7/7 [02:39<00:00, 22.84s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||||
5
unitree_z1_dual_arm_stackbox/case1/psnr_result.json
Normal file
5
unitree_z1_dual_arm_stackbox/case1/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_stackbox/case1/unitree_z1_dual_arm_stackbox_case1.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_stackbox/case1/output/inference/5_full_fs4.mp4",
|
||||||
|
"psnr": 42.717688631296596
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_stackbox"
|
|||||||
--n_iter 7 \
|
--n_iter 7 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
111
unitree_z1_dual_arm_stackbox/case2/output.log
Normal file
111
unitree_z1_dual_arm_stackbox/case2/output.log
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
2026-02-11 18:13:57.132827: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:13:57.182101: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:13:57.182156: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:13:57.183471: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:13:57.190931: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:13:58.104923: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/7 [00:00<?, ?it/s]
|
||||||
|
14%|█▍ | 1/7 [00:23<02:20, 23.34s/it]
|
||||||
|
29%|██▊ | 2/7 [00:46<01:54, 22.98s/it]
|
||||||
|
43%|████▎ | 3/7 [01:08<01:31, 22.91s/it]
|
||||||
|
57%|█████▋ | 4/7 [01:31<01:08, 22.87s/it]
|
||||||
|
71%|███████▏ | 5/7 [01:54<00:45, 22.84s/it]
|
||||||
|
86%|████████▌ | 6/7 [02:17<00:22, 22.80s/it]
|
||||||
|
100%|██████████| 7/7 [02:39<00:00, 22.77s/it]
|
||||||
|
100%|██████████| 7/7 [02:39<00:00, 22.84s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||||
5
unitree_z1_dual_arm_stackbox/case2/psnr_result.json
Normal file
5
unitree_z1_dual_arm_stackbox/case2/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_stackbox/case2/unitree_z1_dual_arm_stackbox_case2.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_stackbox/case2/output/inference/15_full_fs4.mp4",
|
||||||
|
"psnr": 44.90750363879194
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_stackbox"
|
|||||||
--n_iter 7 \
|
--n_iter 7 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
111
unitree_z1_dual_arm_stackbox/case3/output.log
Normal file
111
unitree_z1_dual_arm_stackbox/case3/output.log
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
2026-02-11 18:17:16.023670: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:17:16.073206: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:17:16.073251: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:17:16.074552: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:17:16.082033: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:17:16.997362: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/7 [00:00<?, ?it/s]
|
||||||
|
14%|█▍ | 1/7 [00:23<02:20, 23.41s/it]
|
||||||
|
29%|██▊ | 2/7 [00:46<01:55, 23.03s/it]
|
||||||
|
43%|████▎ | 3/7 [01:09<01:31, 22.95s/it]
|
||||||
|
57%|█████▋ | 4/7 [01:31<01:08, 22.91s/it]
|
||||||
|
71%|███████▏ | 5/7 [01:54<00:45, 22.87s/it]
|
||||||
|
86%|████████▌ | 6/7 [02:17<00:22, 22.84s/it]
|
||||||
|
100%|██████████| 7/7 [02:40<00:00, 22.82s/it]
|
||||||
|
100%|██████████| 7/7 [02:40<00:00, 22.89s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||||
5
unitree_z1_dual_arm_stackbox/case3/psnr_result.json
Normal file
5
unitree_z1_dual_arm_stackbox/case3/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_stackbox/case3/unitree_z1_dual_arm_stackbox_case3.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_stackbox/case3/output/inference/25_full_fs4.mp4",
|
||||||
|
"psnr": 39.63695040491171
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_stackbox"
|
|||||||
--n_iter 7 \
|
--n_iter 7 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
111
unitree_z1_dual_arm_stackbox/case4/output.log
Normal file
111
unitree_z1_dual_arm_stackbox/case4/output.log
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
2026-02-11 18:20:35.210324: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:20:35.259487: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:20:35.259530: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:20:35.260816: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:20:35.268252: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:20:36.181189: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/7 [00:00<?, ?it/s]
|
||||||
|
14%|█▍ | 1/7 [00:23<02:20, 23.43s/it]
|
||||||
|
29%|██▊ | 2/7 [00:46<01:55, 23.03s/it]
|
||||||
|
43%|████▎ | 3/7 [01:09<01:31, 22.96s/it]
|
||||||
|
57%|█████▋ | 4/7 [01:31<01:08, 22.92s/it]
|
||||||
|
71%|███████▏ | 5/7 [01:54<00:45, 22.89s/it]
|
||||||
|
86%|████████▌ | 6/7 [02:17<00:22, 22.86s/it]
|
||||||
|
100%|██████████| 7/7 [02:40<00:00, 22.84s/it]
|
||||||
|
100%|██████████| 7/7 [02:40<00:00, 22.91s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||||
5
unitree_z1_dual_arm_stackbox/case4/psnr_result.json
Normal file
5
unitree_z1_dual_arm_stackbox/case4/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_stackbox/case4/unitree_z1_dual_arm_stackbox_case4.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_stackbox/case4/output/inference/35_full_fs4.mp4",
|
||||||
|
"psnr": 42.34177660061245
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_stackbox"
|
|||||||
--n_iter 7 \
|
--n_iter 7 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
@@ -1,24 +1,16 @@
|
|||||||
2026-02-10 17:39:22.590654: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
2026-02-11 18:23:54.635983: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
2026-02-10 17:39:22.640645: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
2026-02-11 18:23:54.685542: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
2026-02-10 17:39:22.640689: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
2026-02-11 18:23:54.685587: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
2026-02-10 17:39:22.642010: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
2026-02-11 18:23:54.686907: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
2026-02-10 17:39:22.649530: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
2026-02-11 18:23:54.694405: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
2026-02-10 17:39:23.575804: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
2026-02-11 18:23:55.620959: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
Global seed set to 123
|
Global seed set to 123
|
||||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
>>> Prepared model loaded.
|
||||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
INFO:root:Loaded ViT-H-14 model config.
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): hf-mirror.com:443
|
|
||||||
DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
|
|
||||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
|
||||||
INFO:root:Loaded ViT-H-14 model config.
|
|
||||||
DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
|
|
||||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
|
||||||
>>> model checkpoint loaded.
|
|
||||||
>>> Load pre-trained model ...
|
|
||||||
INFO:root:***** Configing Data *****
|
INFO:root:***** Configing Data *****
|
||||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
>>> unitree_z1_stackbox: data stats loaded.
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
@@ -36,13 +28,46 @@ INFO:root:***** Configing Data *****
|
|||||||
>>> unitree_g1_pack_camera: data stats loaded.
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
>>> Dataset is successfully loaded ...
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
>>> Generate 16 frames under each generation ...
|
>>> Generate 16 frames under each generation ...
|
||||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/11 [00:00<?, ?it/s]
|
||||||
9%|▉ | 1/11 [00:23<03:53, 23.38s/it]
|
9%|▉ | 1/11 [00:23<03:53, 23.38s/it]
|
||||||
|
18%|█▊ | 2/11 [00:46<03:26, 22.96s/it]
|
||||||
|
27%|██▋ | 3/11 [01:08<03:03, 22.91s/it]
|
||||||
|
36%|███▋ | 4/11 [01:31<02:40, 22.86s/it]
|
||||||
|
45%|████▌ | 5/11 [01:54<02:16, 22.83s/it]
|
||||||
|
55%|█████▍ | 6/11 [02:17<01:54, 22.80s/it]
|
||||||
|
64%|██████▎ | 7/11 [02:39<01:31, 22.79s/it]
|
||||||
|
73%|███████▎ | 8/11 [03:02<01:08, 22.79s/it]
|
||||||
|
82%|████████▏ | 9/11 [03:25<00:45, 22.78s/it]
|
||||||
|
91%|█████████ | 10/11 [03:48<00:22, 22.76s/it]
|
||||||
|
100%|██████████| 11/11 [04:10<00:00, 22.75s/it]
|
||||||
|
100%|██████████| 11/11 [04:10<00:00, 22.82s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
>>> Step 7: generating actions ...
|
>>> Step 7: generating actions ...
|
||||||
>>> Step 7: interacting with world model ...
|
>>> Step 7: interacting with world model ...
|
||||||
@@ -92,39 +117,7 @@ DEBUG:PIL.Image:Importing WmfImagePlugin
|
|||||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
|
||||||
|
|
||||||
9%|▉ | 1/11 [00:35<05:55, 35.52s/it]
|
|
||||||
18%|█▊ | 2/11 [01:11<05:21, 35.73s/it]
|
|
||||||
27%|██▋ | 3/11 [01:47<04:48, 36.04s/it]
|
|
||||||
36%|███▋ | 4/11 [02:24<04:13, 36.19s/it]
|
|
||||||
45%|████▌ | 5/11 [03:00<03:37, 36.25s/it]
|
|
||||||
55%|█████▍ | 6/11 [03:36<03:00, 36.16s/it]
|
|
||||||
64%|██████▎ | 7/11 [04:12<02:24, 36.09s/it]
|
|
||||||
73%|███████▎ | 8/11 [04:48<01:48, 36.08s/it]
|
|
||||||
82%|████████▏ | 9/11 [05:24<01:12, 36.06s/it]
|
|
||||||
91%|█████████ | 10/11 [06:00<00:36, 36.07s/it]
|
|
||||||
100%|██████████| 11/11 [06:36<00:00, 36.07s/it]
|
|
||||||
100%|██████████| 11/11 [06:36<00:00, 36.07s/it]
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 1: generating actions ...
|
|
||||||
>>> Step 1: interacting with world model ...
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 2: generating actions ...
|
|
||||||
>>> Step 2: interacting with world model ...
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 3: generating actions ...
|
|
||||||
>>> Step 3: interacting with world model ...
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 4: generating actions ...
|
|
||||||
>>> Step 4: interacting with world model ...
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 5: generating actions ...
|
|
||||||
>>> Step 5: interacting with world model ...
|
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
|
||||||
>>> Step 6: generating actions ...
|
|
||||||
>>> Step 6: interacting with world model ...
|
|
||||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
>>> Step 7: generating actions ...
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
>>> Step 7: interacting with world model ...
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
{
|
{
|
||||||
"gt_video": "/home/qhy/unifolm-world-model-action/unitree_z1_dual_arm_stackbox_v2/case1/unitree_z1_dual_arm_stackbox_v2_case1.mp4",
|
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case1/unitree_z1_dual_arm_stackbox_v2_case1.mp4",
|
||||||
"pred_video": "/home/qhy/unifolm-world-model-action/unitree_z1_dual_arm_stackbox_v2/case1/output/inference/5_full_fs4.mp4",
|
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case1/output/inference/5_full_fs4.mp4",
|
||||||
"psnr": 25.12008483689618
|
"psnr": 26.683000215343522
|
||||||
}
|
}
|
||||||
123
unitree_z1_dual_arm_stackbox_v2/case2/output.log
Normal file
123
unitree_z1_dual_arm_stackbox_v2/case2/output.log
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
2026-02-11 18:28:48.801743: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:28:48.852069: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:28:48.852128: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:28:48.853466: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:28:48.861133: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:28:49.784354: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/11 [00:00<?, ?it/s]
|
||||||
|
9%|▉ | 1/11 [00:23<03:56, 23.65s/it]
|
||||||
|
18%|█▊ | 2/11 [00:46<03:28, 23.13s/it]
|
||||||
|
27%|██▋ | 3/11 [01:09<03:04, 23.02s/it]
|
||||||
|
36%|███▋ | 4/11 [01:32<02:40, 22.96s/it]
|
||||||
|
45%|████▌ | 5/11 [01:55<02:17, 22.92s/it]
|
||||||
|
55%|█████▍ | 6/11 [02:17<01:54, 22.88s/it]
|
||||||
|
64%|██████▎ | 7/11 [02:40<01:31, 22.84s/it]
|
||||||
|
73%|███████▎ | 8/11 [03:03<01:08, 22.81s/it]
|
||||||
|
82%|████████▏ | 9/11 [03:26<00:45, 22.81s/it]
|
||||||
|
91%|█████████ | 10/11 [03:48<00:22, 22.80s/it]
|
||||||
|
100%|██████████| 11/11 [04:11<00:00, 22.80s/it]
|
||||||
|
100%|██████████| 11/11 [04:11<00:00, 22.88s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
5
unitree_z1_dual_arm_stackbox_v2/case2/psnr_result.json
Normal file
5
unitree_z1_dual_arm_stackbox_v2/case2/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case2/unitree_z1_dual_arm_stackbox_v2_case2.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case2/output/inference/15_full_fs4.mp4",
|
||||||
|
"psnr": 27.46347145461597
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_stackbox_v2"
|
|||||||
--n_iter 11 \
|
--n_iter 11 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
123
unitree_z1_dual_arm_stackbox_v2/case3/output.log
Normal file
123
unitree_z1_dual_arm_stackbox_v2/case3/output.log
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
2026-02-11 18:33:43.119091: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:33:43.169099: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:33:43.169143: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:33:43.170444: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:33:43.177944: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:33:44.102499: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/11 [00:00<?, ?it/s]
|
||||||
|
9%|▉ | 1/11 [00:23<03:53, 23.36s/it]
|
||||||
|
18%|█▊ | 2/11 [00:46<03:26, 22.99s/it]
|
||||||
|
27%|██▋ | 3/11 [01:08<03:03, 22.93s/it]
|
||||||
|
36%|███▋ | 4/11 [01:31<02:40, 22.87s/it]
|
||||||
|
45%|████▌ | 5/11 [01:54<02:17, 22.85s/it]
|
||||||
|
55%|█████▍ | 6/11 [02:17<01:54, 22.80s/it]
|
||||||
|
64%|██████▎ | 7/11 [02:40<01:31, 22.79s/it]
|
||||||
|
73%|███████▎ | 8/11 [03:02<01:08, 22.78s/it]
|
||||||
|
82%|████████▏ | 9/11 [03:25<00:45, 22.76s/it]
|
||||||
|
91%|█████████ | 10/11 [03:48<00:22, 22.74s/it]
|
||||||
|
100%|██████████| 11/11 [04:10<00:00, 22.73s/it]
|
||||||
|
100%|██████████| 11/11 [04:10<00:00, 22.81s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
5
unitree_z1_dual_arm_stackbox_v2/case3/psnr_result.json
Normal file
5
unitree_z1_dual_arm_stackbox_v2/case3/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case3/unitree_z1_dual_arm_stackbox_v2_case3.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case3/output/inference/25_full_fs4.mp4",
|
||||||
|
"psnr": 28.604047286947512
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_stackbox_v2"
|
|||||||
--n_iter 11 \
|
--n_iter 11 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
123
unitree_z1_dual_arm_stackbox_v2/case4/output.log
Normal file
123
unitree_z1_dual_arm_stackbox_v2/case4/output.log
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
2026-02-11 18:38:37.252690: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:38:37.301897: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:38:37.301950: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:38:37.303254: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:38:37.310679: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:38:38.237893: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/11 [00:00<?, ?it/s]
|
||||||
|
9%|▉ | 1/11 [00:23<03:53, 23.38s/it]
|
||||||
|
18%|█▊ | 2/11 [00:46<03:26, 22.99s/it]
|
||||||
|
27%|██▋ | 3/11 [01:08<03:03, 22.91s/it]
|
||||||
|
36%|███▋ | 4/11 [01:31<02:40, 22.86s/it]
|
||||||
|
45%|████▌ | 5/11 [01:54<02:16, 22.83s/it]
|
||||||
|
55%|█████▍ | 6/11 [02:17<01:54, 22.82s/it]
|
||||||
|
64%|██████▎ | 7/11 [02:40<01:31, 22.81s/it]
|
||||||
|
73%|███████▎ | 8/11 [03:02<01:08, 22.80s/it]
|
||||||
|
82%|████████▏ | 9/11 [03:25<00:45, 22.78s/it]
|
||||||
|
91%|█████████ | 10/11 [03:48<00:22, 22.77s/it]
|
||||||
|
100%|██████████| 11/11 [04:11<00:00, 22.77s/it]
|
||||||
|
100%|██████████| 11/11 [04:11<00:00, 22.83s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||||
5
unitree_z1_dual_arm_stackbox_v2/case4/psnr_result.json
Normal file
5
unitree_z1_dual_arm_stackbox_v2/case4/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case4/unitree_z1_dual_arm_stackbox_v2_case4.mp4",
|
||||||
|
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case4/output/inference/35_full_fs4.mp4",
|
||||||
|
"psnr": 25.578498826379903
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_dual_arm_stackbox_v2"
|
|||||||
--n_iter 11 \
|
--n_iter 11 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
126
unitree_z1_stackbox/case1/output.log
Normal file
126
unitree_z1_stackbox/case1/output.log
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
2026-02-11 18:43:31.592464: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:43:31.641865: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:43:31.641908: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:43:31.643209: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:43:31.650663: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:43:32.564662: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/12 [00:00<?, ?it/s]
|
||||||
|
8%|▊ | 1/12 [00:23<04:17, 23.41s/it]
|
||||||
|
17%|█▋ | 2/12 [00:46<03:50, 23.03s/it]
|
||||||
|
25%|██▌ | 3/12 [01:09<03:26, 22.97s/it]
|
||||||
|
33%|███▎ | 4/12 [01:31<03:03, 22.92s/it]
|
||||||
|
42%|████▏ | 5/12 [01:54<02:40, 22.88s/it]
|
||||||
|
50%|█████ | 6/12 [02:17<02:17, 22.84s/it]
|
||||||
|
58%|█████▊ | 7/12 [02:40<01:54, 22.80s/it]
|
||||||
|
67%|██████▋ | 8/12 [03:02<01:31, 22.78s/it]
|
||||||
|
75%|███████▌ | 9/12 [03:25<01:08, 22.78s/it]
|
||||||
|
83%|████████▎ | 10/12 [03:48<00:45, 22.78s/it]
|
||||||
|
92%|█████████▏| 11/12 [04:11<00:22, 22.77s/it]
|
||||||
|
100%|██████████| 12/12 [04:34<00:00, 22.77s/it]
|
||||||
|
100%|██████████| 12/12 [04:34<00:00, 22.83s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 11: generating actions ...
|
||||||
|
>>> Step 11: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
5
unitree_z1_stackbox/case1/psnr_result.json
Normal file
5
unitree_z1_stackbox/case1/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_stackbox/case1/unitree_z1_stackbox_case1.mp4",
|
||||||
|
"pred_video": "unitree_z1_stackbox/case1/output/inference/5_full_fs4.mp4",
|
||||||
|
"psnr": 46.05271283048069
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_stackbox"
|
|||||||
--n_iter 12 \
|
--n_iter 12 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
126
unitree_z1_stackbox/case2/output.log
Normal file
126
unitree_z1_stackbox/case2/output.log
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
2026-02-11 18:48:44.235405: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:48:44.285138: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:48:44.285181: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:48:44.286531: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:48:44.294141: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:48:45.209453: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/12 [00:00<?, ?it/s]
|
||||||
|
8%|▊ | 1/12 [00:23<04:17, 23.37s/it]
|
||||||
|
17%|█▋ | 2/12 [00:46<03:49, 22.97s/it]
|
||||||
|
25%|██▌ | 3/12 [01:08<03:26, 22.91s/it]
|
||||||
|
33%|███▎ | 4/12 [01:31<03:02, 22.86s/it]
|
||||||
|
42%|████▏ | 5/12 [01:54<02:39, 22.82s/it]
|
||||||
|
50%|█████ | 6/12 [02:17<02:16, 22.81s/it]
|
||||||
|
58%|█████▊ | 7/12 [02:39<01:53, 22.79s/it]
|
||||||
|
67%|██████▋ | 8/12 [03:02<01:31, 22.78s/it]
|
||||||
|
75%|███████▌ | 9/12 [03:25<01:08, 22.76s/it]
|
||||||
|
83%|████████▎ | 10/12 [03:48<00:45, 22.75s/it]
|
||||||
|
92%|█████████▏| 11/12 [04:10<00:22, 22.74s/it]
|
||||||
|
100%|██████████| 12/12 [04:33<00:00, 22.72s/it]
|
||||||
|
100%|██████████| 12/12 [04:33<00:00, 22.80s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 11: generating actions ...
|
||||||
|
>>> Step 11: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
5
unitree_z1_stackbox/case2/psnr_result.json
Normal file
5
unitree_z1_stackbox/case2/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_stackbox/case2/unitree_z1_stackbox_case2.mp4",
|
||||||
|
"pred_video": "unitree_z1_stackbox/case2/output/inference/15_full_fs4.mp4",
|
||||||
|
"psnr": 38.94694381287429
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_stackbox"
|
|||||||
--n_iter 12 \
|
--n_iter 12 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
126
unitree_z1_stackbox/case3/output.log
Normal file
126
unitree_z1_stackbox/case3/output.log
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
2026-02-11 18:53:57.068615: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:53:57.118271: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:53:57.118312: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:53:57.119665: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:53:57.127266: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:53:58.042116: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/12 [00:00<?, ?it/s]
|
||||||
|
8%|▊ | 1/12 [00:23<04:17, 23.43s/it]
|
||||||
|
17%|█▋ | 2/12 [00:46<03:50, 23.02s/it]
|
||||||
|
25%|██▌ | 3/12 [01:09<03:26, 22.96s/it]
|
||||||
|
33%|███▎ | 4/12 [01:31<03:03, 22.92s/it]
|
||||||
|
42%|████▏ | 5/12 [01:54<02:40, 22.87s/it]
|
||||||
|
50%|█████ | 6/12 [02:17<02:17, 22.85s/it]
|
||||||
|
58%|█████▊ | 7/12 [02:40<01:54, 22.83s/it]
|
||||||
|
67%|██████▋ | 8/12 [03:03<01:31, 22.80s/it]
|
||||||
|
75%|███████▌ | 9/12 [03:25<01:08, 22.78s/it]
|
||||||
|
83%|████████▎ | 10/12 [03:48<00:45, 22.77s/it]
|
||||||
|
92%|█████████▏| 11/12 [04:11<00:22, 22.76s/it]
|
||||||
|
100%|██████████| 12/12 [04:33<00:00, 22.75s/it]
|
||||||
|
100%|██████████| 12/12 [04:33<00:00, 22.83s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 11: generating actions ...
|
||||||
|
>>> Step 11: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
5
unitree_z1_stackbox/case3/psnr_result.json
Normal file
5
unitree_z1_stackbox/case3/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_stackbox/case3/unitree_z1_stackbox_case3.mp4",
|
||||||
|
"pred_video": "unitree_z1_stackbox/case3/output/inference/25_full_fs4.mp4",
|
||||||
|
"psnr": 49.489774674892764
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_stackbox"
|
|||||||
--n_iter 12 \
|
--n_iter 12 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
126
unitree_z1_stackbox/case4/output.log
Normal file
126
unitree_z1_stackbox/case4/output.log
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
2026-02-11 18:59:09.688302: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||||
|
2026-02-11 18:59:09.737473: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||||
|
2026-02-11 18:59:09.737518: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||||
|
2026-02-11 18:59:09.738835: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||||
|
2026-02-11 18:59:09.746322: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||||
|
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||||
|
2026-02-11 18:59:10.660940: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||||
|
Global seed set to 123
|
||||||
|
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||||
|
>>> Prepared model loaded.
|
||||||
|
>>> Diffusion backbone (model.model) converted to FP16.
|
||||||
|
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||||
|
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||||
|
INFO:root:***** Configing Data *****
|
||||||
|
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||||
|
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||||
|
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||||
|
>>> unitree_g1_pack_camera: data stats loaded.
|
||||||
|
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||||
|
>>> Dataset is successfully loaded ...
|
||||||
|
✓ KV fused: 66 attention layers
|
||||||
|
>>> Generate 16 frames under each generation ...
|
||||||
|
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||||
|
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||||
|
|
||||||
|
0%| | 0/12 [00:00<?, ?it/s]
|
||||||
|
8%|▊ | 1/12 [00:23<04:17, 23.39s/it]
|
||||||
|
17%|█▋ | 2/12 [00:46<03:50, 23.01s/it]
|
||||||
|
25%|██▌ | 3/12 [01:09<03:26, 22.96s/it]
|
||||||
|
33%|███▎ | 4/12 [01:31<03:03, 22.92s/it]
|
||||||
|
42%|████▏ | 5/12 [01:54<02:40, 22.86s/it]
|
||||||
|
50%|█████ | 6/12 [02:17<02:16, 22.82s/it]
|
||||||
|
58%|█████▊ | 7/12 [02:40<01:53, 22.79s/it]
|
||||||
|
67%|██████▋ | 8/12 [03:02<01:31, 22.77s/it]
|
||||||
|
75%|███████▌ | 9/12 [03:25<01:08, 22.77s/it]
|
||||||
|
83%|████████▎ | 10/12 [03:48<00:45, 22.78s/it]
|
||||||
|
92%|█████████▏| 11/12 [04:11<00:22, 22.77s/it]
|
||||||
|
100%|██████████| 12/12 [04:33<00:00, 22.77s/it]
|
||||||
|
100%|██████████| 12/12 [04:33<00:00, 22.83s/it]
|
||||||
|
>>> Step 0: generating actions ...
|
||||||
|
>>> Step 0: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 1: generating actions ...
|
||||||
|
>>> Step 1: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 2: generating actions ...
|
||||||
|
>>> Step 2: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 3: generating actions ...
|
||||||
|
>>> Step 3: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 4: generating actions ...
|
||||||
|
>>> Step 4: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 5: generating actions ...
|
||||||
|
>>> Step 5: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 6: generating actions ...
|
||||||
|
>>> Step 6: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 7: generating actions ...
|
||||||
|
>>> Step 7: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 8: generating actions ...
|
||||||
|
>>> Step 8: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 9: generating actions ...
|
||||||
|
>>> Step 9: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 10: generating actions ...
|
||||||
|
>>> Step 10: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
>>> Step 11: generating actions ...
|
||||||
|
>>> Step 11: interacting with world model ...
|
||||||
|
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||||
|
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||||
|
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||||
|
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||||
|
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||||
5
unitree_z1_stackbox/case4/psnr_result.json
Normal file
5
unitree_z1_stackbox/case4/psnr_result.json
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
{
|
||||||
|
"gt_video": "unitree_z1_stackbox/case4/unitree_z1_stackbox_case4.mp4",
|
||||||
|
"pred_video": "unitree_z1_stackbox/case4/output/inference/35_full_fs4.mp4",
|
||||||
|
"psnr": 47.18724378194084
|
||||||
|
}
|
||||||
@@ -20,5 +20,6 @@ dataset="unitree_z1_stackbox"
|
|||||||
--n_iter 12 \
|
--n_iter 12 \
|
||||||
--timestep_spacing 'uniform_trailing' \
|
--timestep_spacing 'uniform_trailing' \
|
||||||
--guidance_rescale 0.7 \
|
--guidance_rescale 0.7 \
|
||||||
--perframe_ae
|
--perframe_ae \
|
||||||
|
--fast_policy_no_decode
|
||||||
} 2>&1 | tee "${res_dir}/output.log"
|
} 2>&1 | tee "${res_dir}/output.log"
|
||||||
|
|||||||
Reference in New Issue
Block a user