Compare commits
2 Commits
508b91f5a2
...
trt-replac
| Author | SHA1 | Date | |
|---|---|---|---|
| 68d695d81d | |||
| 65788be1b3 |
@@ -9,12 +9,7 @@
|
||||
"Bash(nvidia-smi:*)",
|
||||
"Bash(conda activate unifolm-wma)",
|
||||
"Bash(conda info:*)",
|
||||
"Bash(direnv allow:*)",
|
||||
"Bash(ls:*)",
|
||||
"Bash(for scenario in unitree_g1_pack_camera unitree_z1_dual_arm_cleanup_pencils unitree_z1_dual_arm_stackbox unitree_z1_dual_arm_stackbox_v2 unitree_z1_stackbox)",
|
||||
"Bash(do for case in case1 case2 case3 case4)",
|
||||
"Bash(done)",
|
||||
"Bash(chmod:*)"
|
||||
"Bash(direnv allow:*)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -132,3 +132,5 @@ Experiment/log
|
||||
|
||||
*.0
|
||||
ckpts/unifolm_wma_dual.ckpt.prepared.pt
|
||||
trt_engines/video_backbone.engine
|
||||
trt_engines/video_backbone.onnx
|
||||
|
||||
208
record.md
Normal file
208
record.md
Normal file
@@ -0,0 +1,208 @@
|
||||
好,下面是完整的 decode 优化讲解。
|
||||
|
||||
---
|
||||
背景:原始 decode 流程的问题
|
||||
|
||||
原始代码中,每轮迭代的 world model 交互会调用 image_guided_synthesis_sim_mode,内部对DDIM 采样得到的 latent 做全量 VAE decode:
|
||||
|
||||
# ddpms.py decode_core, perframe_ae=True
|
||||
for index in range(z.shape[0]): # 逐帧循环,16帧就调16次
|
||||
frame_z = 1. / self.scale_factor * z[index:index+1]
|
||||
frame_result = self.first_stage_model.decode(frame_z)
|
||||
results.append(frame_result)
|
||||
|
||||
假设 8 轮迭代、每轮 16 帧,WM 路径 decode 128 次,policy 路径再 decode 128 次,共 256 次 VAE decode。每次都是同步阻塞的。
|
||||
|
||||
decode 完的视频还会被逐轮写成 mp4 + tensorboard,产生大量磁盘 I/O。最后还要把所有轮的 decoded video 在内存中torch.cat
|
||||
拼接,再写一次完整视频。
|
||||
|
||||
---
|
||||
优化1:decode_video 开关——按需跳过 decode
|
||||
|
||||
文件: world_model_interaction.py函数 image_guided_synthesis_sim_mode
|
||||
|
||||
改动: 给函数加decode_video 参数(默认 False),返回值增加 raw samples:
|
||||
|
||||
def image_guided_synthesis_sim_mode(...,
|
||||
decode_video: bool = False, # 新增
|
||||
...) -> tuple[Tensor | None, Tensor, Tensor, Tensor | None]:
|
||||
|
||||
samples = None
|
||||
if ddim_sampler is not None:
|
||||
samples, actions, states, intermedia = ddim_sampler.sample(...)if decode_video: # 条件 decode
|
||||
batch_images = model.decode_first_stage(samples)
|
||||
batch_variants = batch_images
|
||||
|
||||
return batch_variants, actions, states, samples# 多返回 samples
|
||||
|
||||
调用侧:
|
||||
- Policy 路径:由 CLI 参数 --fast_policy_no_decode 控制,只需要 action 时可跳过 decode
|
||||
- WM 交互路径:传decode_video=False,只拿 raw latent
|
||||
|
||||
效果: WM 路径每轮省掉 16 帧全量 decode。
|
||||
|
||||
---
|
||||
优化2:只decode observation 需要的帧
|
||||
|
||||
问题: WM 跳过了全量 decode,但下一轮的CLIP embedding 需要 pixel-space 图像做 observation。
|
||||
|
||||
改动: 只decode exe_steps 帧(通常 1帧),而不是全部 16 帧:
|
||||
|
||||
# WM 调用,不做全量 decode
|
||||
pred_videos_1, _, pred_states, wm_samples = image_guided_synthesis_sim_mode(
|
||||
..., decode_video=False)
|
||||
|
||||
# 只 decode exe_steps 帧给 observation
|
||||
obs_pixels = model.decode_first_stage(
|
||||
wm_samples[:, :, :args.exe_steps, :, :])
|
||||
|
||||
for idx in range(args.exe_steps):
|
||||
observation = {
|
||||
'observation.images.top':obs_pixels[0, :, idx:idx + 1].permute(1, 0, 2, 3),
|
||||
...
|
||||
}
|
||||
cond_obs_queues = populate_queues(cond_obs_queues, observation)
|
||||
|
||||
关键细节: 必须逐帧填充 observation queue(idx:idx+1),不能全用最后一帧,否则 CLIP embedding 输入变了会影响精度。
|
||||
|
||||
效果: 每轮从 decode 16 帧降到 decode exe_steps 帧(省15 帧/轮)。
|
||||
|
||||
---
|
||||
优化3:decode stream——GPU 上并行 decode 和 UNet
|
||||
|
||||
问题: 写入最终视频仍需要完整 segment 的 pixel,这部分 decode 还是要做。
|
||||
|
||||
思路: 用独立 CUDA stream 做 segment decode,和下一轮 UNet 推断在 GPU 上并行。
|
||||
|
||||
改动:
|
||||
|
||||
初始化:
|
||||
decode_stream = torch.cuda.Stream(device=device)
|
||||
pending_decode = None
|
||||
|
||||
循环尾部:
|
||||
# 收集上一轮 decode 结果
|
||||
if pending_decode is not None:
|
||||
decode_stream.synchronize()
|
||||
write_q.put(pending_decode.cpu())
|
||||
pending_decode = None
|
||||
|
||||
# 在 decode stream 上启动当前轮 segment decode(不阻塞主线程)
|
||||
latent_slice = wm_samples[:, :, :args.exe_steps]
|
||||
decode_stream.wait_stream(torch.cuda.current_stream()) # 确保 latent 就绪
|
||||
with torch.cuda.stream(decode_stream):
|
||||
pending_decode = model.decode_first_stage(latent_slice)
|
||||
# 主线程立即进入下一轮 UNet
|
||||
|
||||
循环结束后收集最后一轮:
|
||||
if pending_decode is not None:
|
||||
decode_stream.synchronize()
|
||||
write_q.put(pending_decode.cpu())
|
||||
|
||||
原理: decode_stream.wait_stream() 建立 stream间依赖,确保 latent 产出后才开始 decode。两个 stream 的 kernel 可以被GPU
|
||||
调度器交错执行。
|
||||
|
||||
效果: segment decode 时间被下一轮 UNet 推断掩盖。
|
||||
|
||||
---
|
||||
优化4:Writer 进程——CPU 工作跨进程并行
|
||||
|
||||
问题: decode 完的tensor 需要转numpy + cv2 编码写盘,这是 CPU 密集型操作,Python GIL 限制线程并行。
|
||||
|
||||
改动:
|
||||
|
||||
辅助函数(主进程和子进程都能调用):
|
||||
def _video_tensor_to_frames(video: Tensor) -> np.ndarray:
|
||||
video = torch.clamp(video.float(), -1., 1.)
|
||||
n = video.shape[0]
|
||||
video = video.permute(2, 0, 1, 3, 4)
|
||||
frame_grids = [
|
||||
torchvision.utils.make_grid(f, nrow=int(n), padding=0) for f in video
|
||||
]
|
||||
grid = torch.stack(frame_grids, dim=0)
|
||||
grid = ((grid + 1.0) / 2.0 * 255).to(torch.uint8).permute(0, 2, 3, 1)
|
||||
return grid.numpy()[:, :, :, ::-1] # RGB → BGR
|
||||
|
||||
Writer 进程:
|
||||
def _video_writer_process(q: mp.Queue, filename: str, fps: int):
|
||||
vwriter = None
|
||||
while True:
|
||||
item = q.get()
|
||||
if item is None: # sentinel,退出
|
||||
break
|
||||
frames = _video_tensor_to_frames(item)
|
||||
if vwriter is None:
|
||||
h, w = frames.shape[1], frames.shape[2]
|
||||
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
||||
vwriter = cv2.VideoWriter(filename, fourcc, fps, (w, h))
|
||||
for f in frames:
|
||||
vwriter.write(f)
|
||||
if vwriter is not None:
|
||||
vwriter.release()
|
||||
|
||||
主进程启动 writer:
|
||||
write_q = mp.Queue()
|
||||
writer_proc = mp.Process(target=_video_writer_process,
|
||||
args=(write_q, sample_full_video_file, args.save_fps))
|
||||
writer_proc.start()
|
||||
|
||||
主进程通过 write_q.put(tensor.cpu()) 发送数据,循环结束发None sentinel 并join()。
|
||||
|
||||
效果:
|
||||
- tensor→numpy 转换和cv2 编码不占主进程 CPU 时间
|
||||
- 不受 GIL 限制
|
||||
- cv2.VideoWriter 增量写入,不攒内存,不做最终 torch.cat
|
||||
- Queue 自带背压,writer 处理不过来时 put 自然阻塞,不会 OOM
|
||||
|
||||
---
|
||||
同时删除的冗余 I/O
|
||||
|
||||
- 每轮迭代的 WM 中间tensorboard log(log_to_tensorboard_async)
|
||||
- 每轮迭代的 WM 中间 mp4(save_results_async)
|
||||
- 每轮迭代的 policy 中间 mp4
|
||||
- 最终的 torch.cat + 一次性 save_results_async 写完整视频
|
||||
|
||||
---
|
||||
总结:优化前后对比
|
||||
|
||||
┌─────────────────────┬────────────────────────┬───────────────────────────┐
|
||||
│ │ 原始 │ 优化后 │
|
||||
├─────────────────────┼────────────────────────┼───────────────────────────┤
|
||||
│ WM decode帧数/轮 │ 16 │ exe_steps(通常1) │
|
||||
├─────────────────────┼────────────────────────┼───────────────────────────┤
|
||||
│ segment decode 阻塞 │ 同步阻塞主线程 │ decode stream,GPU并行 │
|
||||
├─────────────────────┼────────────────────────┼───────────────────────────┤
|
||||
│ tensor→numpy+写盘 │ 主线程同步/线程池(GIL) │ 独立进程,无GIL │
|
||||
├─────────────────────┼────────────────────────┼───────────────────────────┤
|
||||
│ 视频写入方式 │ 攒内存→cat→一次写 │ cv2增量追加写 │
|
||||
├─────────────────────┼────────────────────────┼───────────────────────────┤
|
||||
│ 中间I/O │ 每轮2×mp4+2×tb │ 无 │
|
||||
├─────────────────────┼────────────────────────┼───────────────────────────┤
|
||||
│ 内存占用 │ 随迭代数线性增长 │ 恒定(不攒decoded video) │
|
||||
└─────────────────────┴────────────────────────┴───────────────────────────┘
|
||||
|
||||
✻ Cooked for 1m 31s
|
||||
|
||||
❯ 目前哪些优化是默认开启的
|
||||
|
||||
● 让我检查各个优化的开关状态。
|
||||
|
||||
● Searched for 2 patterns (ctrl+o to expand)
|
||||
|
||||
● 当前各优化的默认状态:
|
||||
|
||||
┌───────────────────────────────────┬────────────┬────────────────────────────────────────────────────────┐
|
||||
│ 优化 │ 默认开启? │ 控制方式 │
|
||||
├───────────────────────────────────┼────────────┼────────────────────────────────────────────────────────┤
|
||||
│ WM路径跳过全量decode │ ✅ 是 │ 硬编码 decode_video=False(789行) │
|
||||
├───────────────────────────────────┼────────────┼────────────────────────────────────────────────────────┤
|
||||
│ 只decode exe_steps帧给observation │ ✅ 是 │ 无开关,直接生效(792行) │
|
||||
├───────────────────────────────────┼────────────┼────────────────────────────────────────────────────────┤
|
||||
│ decode stream GPU并行 │ ✅ 是 │ 无开关,直接生效(824-827行) │
|
||||
├───────────────────────────────────┼────────────┼────────────────────────────────────────────────────────┤
|
||||
│ Writer进程增量写盘 │ ✅ 是 │ 无开关,直接生效(672-675行) │
|
||||
├───────────────────────────────────┼────────────┼────────────────────────────────────────────────────────┤
|
||||
│ 删除中间wm mp4/tb保存 │ ✅ 是 │ 代码已删除 │
|
||||
├───────────────────────────────────┼────────────┼────────────────────────────────────────────────────────┤
|
||||
│ Policy路径跳过decode │ ❌ 否 │ 需传--fast_policy_no_decode(默认False,即默认decode) │
|
||||
└───────────────────────────────────┴────────────┴────────────────────────────────────────────────────────┘
|
||||
File diff suppressed because it is too large
Load Diff
504
run_all_cases_20260218_190150.log
Normal file
504
run_all_cases_20260218_190150.log
Normal file
@@ -0,0 +1,504 @@
|
||||
2026-02-18 19:01:56.891895: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-18 19:01:56.940243: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-18 19:01:56.940285: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-18 19:01:56.941395: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-18 19:01:56.948327: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-18 19:01:57.870809: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
TRT output 'y': [1, 4, 16, 40, 64] torch.float32
|
||||
TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32
|
||||
TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32
|
||||
>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s][02/18/2026-19:02:10] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
|
||||
|
||||
9%|▉ | 1/11 [00:17<02:51, 17.15s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [00:33<02:31, 16.87s/it]
|
||||
27%|██▋ | 3/11 [00:50<02:14, 16.76s/it]
|
||||
36%|███▋ | 4/11 [01:07<01:57, 16.81s/it]
|
||||
45%|████▌ | 5/11 [01:24<01:41, 16.85s/it]
|
||||
55%|█████▍ | 6/11 [01:41<01:24, 16.82s/it]
|
||||
64%|██████▎ | 7/11 [01:57<01:07, 16.82s/it]
|
||||
73%|███████▎ | 8/11 [02:14<00:50, 16.83s/it]
|
||||
82%|████████▏ | 9/11 [02:31<00:33, 16.80s/it]
|
||||
91%|█████████ | 10/11 [02:48<00:16, 16.81s/it]
|
||||
100%|██████████| 11/11 [03:05<00:00, 16.81s/it]
|
||||
100%|██████████| 11/11 [03:05<00:00, 16.83s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 8: generating actions ...
|
||||
>>> Step 8: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 9: generating actions ...
|
||||
>>> Step 9: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 10: generating actions ...
|
||||
>>> Step 10: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
|
||||
real 3m49.072s
|
||||
user 4m16.055s
|
||||
sys 0m44.636s
|
||||
2026-02-18 19:05:45.956647: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-18 19:05:46.004149: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-18 19:05:46.004193: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-18 19:05:46.005265: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-18 19:05:46.012074: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-18 19:05:46.932966: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
TRT output 'y': [1, 4, 16, 40, 64] torch.float32
|
||||
TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32
|
||||
TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32
|
||||
>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s][02/18/2026-19:05:59] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
|
||||
|
||||
9%|▉ | 1/11 [00:16<02:47, 16.71s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [00:33<02:30, 16.75s/it]
|
||||
27%|██▋ | 3/11 [00:50<02:15, 16.91s/it]
|
||||
36%|███▋ | 4/11 [01:07<01:59, 17.02s/it]
|
||||
45%|████▌ | 5/11 [01:24<01:41, 16.98s/it]
|
||||
55%|█████▍ | 6/11 [01:41<01:24, 16.94s/it]
|
||||
64%|██████▎ | 7/11 [01:58<01:07, 16.90s/it]
|
||||
73%|███████▎ | 8/11 [02:15<00:50, 16.83s/it]
|
||||
82%|████████▏ | 9/11 [02:31<00:33, 16.80s/it]
|
||||
91%|█████████ | 10/11 [02:49<00:16, 16.94s/it]
|
||||
100%|██████████| 11/11 [03:06<00:00, 16.97s/it]
|
||||
100%|██████████| 11/11 [03:06<00:00, 16.91s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 8: generating actions ...
|
||||
>>> Step 8: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 9: generating actions ...
|
||||
>>> Step 9: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 10: generating actions ...
|
||||
>>> Step 10: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
|
||||
real 3m49.162s
|
||||
user 4m12.814s
|
||||
sys 0m45.565s
|
||||
2026-02-18 19:09:35.113634: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-18 19:09:35.161428: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-18 19:09:35.161474: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-18 19:09:35.162551: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-18 19:09:35.169325: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-18 19:09:36.089250: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
TRT output 'y': [1, 4, 16, 40, 64] torch.float32
|
||||
TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32
|
||||
TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32
|
||||
>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s][02/18/2026-19:09:49] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
|
||||
|
||||
9%|▉ | 1/11 [00:16<02:45, 16.53s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
0
run_all_psnr.sh
Executable file → Normal file
0
run_all_psnr.sh
Executable file → Normal file
@@ -450,7 +450,6 @@ def image_guided_synthesis_sim_mode(
|
||||
|
||||
img = observation['observation.images.top'].permute(0, 2, 1, 3, 4)
|
||||
cond_img = rearrange(img, 'b o c h w -> (b o) c h w')[-1:]
|
||||
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||
cond_img_emb = model.embedder(cond_img)
|
||||
cond_img_emb = model.image_proj_model(cond_img_emb)
|
||||
|
||||
@@ -466,7 +465,6 @@ def image_guided_synthesis_sim_mode(
|
||||
prompts = [""] * batch_size
|
||||
cond_ins_emb = model.get_learned_conditioning(prompts)
|
||||
|
||||
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||
cond_state_emb = model.state_projector(observation['observation.state'])
|
||||
cond_state_emb = cond_state_emb + model.agent_state_pos_emb
|
||||
|
||||
@@ -494,7 +492,6 @@ def image_guided_synthesis_sim_mode(
|
||||
cond_mask = None
|
||||
cond_z0 = None
|
||||
batch_variants = None
|
||||
samples = None
|
||||
if ddim_sampler is not None:
|
||||
samples, actions, states, intermedia = ddim_sampler.sample(
|
||||
S=ddim_steps,
|
||||
@@ -518,7 +515,7 @@ def image_guided_synthesis_sim_mode(
|
||||
batch_images = model.decode_first_stage(samples)
|
||||
batch_variants = batch_images
|
||||
|
||||
return batch_variants, actions, states, samples
|
||||
return batch_variants, actions, states
|
||||
|
||||
|
||||
def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
||||
@@ -574,22 +571,6 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
||||
torch.save(model, prepared_path)
|
||||
print(f">>> Prepared model saved ({os.path.getsize(prepared_path) / 1024**3:.1f} GB).")
|
||||
|
||||
# ---- FP16: convert diffusion backbone + conditioning modules ----
|
||||
model.model.to(torch.float16)
|
||||
model.model.diffusion_model.dtype = torch.float16
|
||||
print(">>> Diffusion backbone (model.model) converted to FP16.")
|
||||
|
||||
# Projectors / MLP → FP16
|
||||
model.image_proj_model.half()
|
||||
model.state_projector.half()
|
||||
model.action_projector.half()
|
||||
print(">>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.")
|
||||
|
||||
# Text/image encoders → FP16
|
||||
model.cond_stage_model.half()
|
||||
model.embedder.half()
|
||||
print(">>> Encoders (cond_stage_model, embedder) converted to FP16.")
|
||||
|
||||
# Build normalizer (always needed, independent of model loading path)
|
||||
logging.info("***** Configing Data *****")
|
||||
data = instantiate_from_config(config.data)
|
||||
@@ -604,6 +585,11 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
||||
if isinstance(m, CrossAttention) and m.fuse_kv())
|
||||
print(f" ✓ KV fused: {kv_count} attention layers")
|
||||
|
||||
# Load TRT backbone if engine exists
|
||||
trt_engine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', 'trt_engines', 'video_backbone.engine')
|
||||
if os.path.exists(trt_engine_path):
|
||||
model.model.diffusion_model.load_trt_backbone(trt_engine_path)
|
||||
|
||||
# Run over data
|
||||
assert (args.height % 16 == 0) and (
|
||||
args.width % 16
|
||||
@@ -649,7 +635,7 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
||||
sample_save_dir = f'{video_save_dir}/wm/{fs}'
|
||||
os.makedirs(sample_save_dir, exist_ok=True)
|
||||
# For collecting interaction videos
|
||||
wm_latent = []
|
||||
wm_video = []
|
||||
# Initialize observation queues
|
||||
cond_obs_queues = {
|
||||
"observation.images.top":
|
||||
@@ -705,7 +691,7 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
||||
|
||||
# Use world-model in policy to generate action
|
||||
print(f'>>> Step {itr}: generating actions ...')
|
||||
pred_videos_0, pred_actions, _, _ = image_guided_synthesis_sim_mode(
|
||||
pred_videos_0, pred_actions, _ = image_guided_synthesis_sim_mode(
|
||||
model,
|
||||
sample['instruction'],
|
||||
observation,
|
||||
@@ -747,7 +733,7 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
||||
|
||||
# Interaction with the world-model
|
||||
print(f'>>> Step {itr}: interacting with world model ...')
|
||||
pred_videos_1, _, pred_states, wm_samples = image_guided_synthesis_sim_mode(
|
||||
pred_videos_1, _, pred_states = image_guided_synthesis_sim_mode(
|
||||
model,
|
||||
"",
|
||||
observation,
|
||||
@@ -760,16 +746,12 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
||||
fs=model_input_fs,
|
||||
text_input=False,
|
||||
timestep_spacing=args.timestep_spacing,
|
||||
guidance_rescale=args.guidance_rescale,
|
||||
decode_video=False)
|
||||
|
||||
# Decode only the last frame for CLIP embedding in next iteration
|
||||
last_frame_pixel = model.decode_first_stage(wm_samples[:, :, -1:, :, :])
|
||||
guidance_rescale=args.guidance_rescale)
|
||||
|
||||
for idx in range(args.exe_steps):
|
||||
observation = {
|
||||
'observation.images.top':
|
||||
last_frame_pixel[0, :, 0:1].permute(1, 0, 2, 3),
|
||||
pred_videos_1[0][:, idx:idx + 1].permute(1, 0, 2, 3),
|
||||
'observation.state':
|
||||
torch.zeros_like(pred_states[0][idx:idx + 1]) if
|
||||
args.zero_pred_state else pred_states[0][idx:idx + 1],
|
||||
@@ -787,14 +769,30 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
|
||||
pred_videos_0,
|
||||
sample_tag,
|
||||
fps=args.save_fps)
|
||||
# Save videos environment changes via world-model interaction
|
||||
sample_tag = f"{args.dataset}-vid{sample['videoid']}-wd-fs-{fs}/itr-{itr}"
|
||||
log_to_tensorboard_async(writer,
|
||||
pred_videos_1,
|
||||
sample_tag,
|
||||
fps=args.save_fps)
|
||||
|
||||
# Save the imagen videos for decision-making
|
||||
if pred_videos_0 is not None:
|
||||
sample_video_file = f'{video_save_dir}/dm/{fs}/itr-{itr}.mp4'
|
||||
save_results_async(pred_videos_0,
|
||||
sample_video_file,
|
||||
fps=args.save_fps)
|
||||
# Save videos environment changes via world-model interaction
|
||||
sample_video_file = f'{video_save_dir}/wm/{fs}/itr-{itr}.mp4'
|
||||
save_results_async(pred_videos_1,
|
||||
sample_video_file,
|
||||
fps=args.save_fps)
|
||||
|
||||
print('>' * 24)
|
||||
# Store raw latent for deferred decode
|
||||
wm_latent.append(wm_samples[:, :, :args.exe_steps].cpu())
|
||||
# Collect the result of world-model interactions
|
||||
wm_video.append(pred_videos_1[:, :, :args.exe_steps].cpu())
|
||||
|
||||
# Deferred decode: batch decode all stored latents
|
||||
full_latent = torch.cat(wm_latent, dim=2).to(device)
|
||||
full_video = model.decode_first_stage(full_latent).cpu()
|
||||
full_video = torch.cat(wm_video, dim=2)
|
||||
sample_tag = f"{args.dataset}-vid{sample['videoid']}-wd-fs-{fs}/full"
|
||||
log_to_tensorboard_async(writer,
|
||||
full_video,
|
||||
|
||||
87
scripts/export_trt.py
Normal file
87
scripts/export_trt.py
Normal file
@@ -0,0 +1,87 @@
|
||||
"""Export video UNet backbone to ONNX, then convert to TensorRT engine.
|
||||
|
||||
Usage:
|
||||
python scripts/export_trt.py \
|
||||
--ckpt ckpts/unifolm_wma_dual.ckpt.prepared.pt \
|
||||
--config configs/inference/world_model_interaction.yaml \
|
||||
--out_dir trt_engines
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
|
||||
import torch
|
||||
import tensorrt as trt
|
||||
from omegaconf import OmegaConf
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'src'))
|
||||
from unifolm_wma.utils.utils import instantiate_from_config
|
||||
from unifolm_wma.trt_utils import export_backbone_onnx
|
||||
|
||||
|
||||
def load_model(config_path, ckpt_path):
|
||||
if ckpt_path.endswith('.prepared.pt'):
|
||||
model = torch.load(ckpt_path, map_location='cpu')
|
||||
else:
|
||||
config = OmegaConf.load(config_path)
|
||||
model = instantiate_from_config(config.model)
|
||||
state_dict = torch.load(ckpt_path, map_location='cpu')
|
||||
if 'state_dict' in state_dict:
|
||||
state_dict = state_dict['state_dict']
|
||||
model.load_state_dict(state_dict, strict=False)
|
||||
model.eval().cuda()
|
||||
return model
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--ckpt', required=True)
|
||||
parser.add_argument('--config', default='configs/inference/world_model_interaction.yaml')
|
||||
parser.add_argument('--out_dir', default='trt_engines')
|
||||
parser.add_argument('--context_len', type=int, default=95)
|
||||
parser.add_argument('--fp16', action='store_true', default=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
os.makedirs(args.out_dir, exist_ok=True)
|
||||
onnx_path = os.path.join(args.out_dir, 'video_backbone.onnx')
|
||||
engine_path = os.path.join(args.out_dir, 'video_backbone.engine')
|
||||
|
||||
if os.path.exists(onnx_path):
|
||||
print(f">>> ONNX already exists at {onnx_path}, skipping export.")
|
||||
n_outputs = 10
|
||||
else:
|
||||
print(">>> Loading model ...")
|
||||
model = load_model(args.config, args.ckpt)
|
||||
print(">>> Exporting ONNX ...")
|
||||
with torch.no_grad():
|
||||
n_outputs = export_backbone_onnx(model, onnx_path, context_len=args.context_len)
|
||||
del model
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
print(">>> Converting ONNX -> TensorRT engine ...")
|
||||
logger = trt.Logger(trt.Logger.WARNING)
|
||||
builder = trt.Builder(logger)
|
||||
network = builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
|
||||
parser = trt.OnnxParser(network, logger)
|
||||
|
||||
if not parser.parse_from_file(os.path.abspath(onnx_path)):
|
||||
for i in range(parser.num_errors):
|
||||
print(f" ONNX parse error: {parser.get_error(i)}")
|
||||
raise RuntimeError("ONNX parsing failed")
|
||||
|
||||
config = builder.create_builder_config()
|
||||
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 16 << 30)
|
||||
if args.fp16:
|
||||
config.set_flag(trt.BuilderFlag.FP16)
|
||||
|
||||
engine_bytes = builder.build_serialized_network(network, config)
|
||||
with open(engine_path, 'wb') as f:
|
||||
f.write(engine_bytes)
|
||||
|
||||
print(f"\n>>> Done! Engine saved to {engine_path}")
|
||||
print(f" Outputs: 1 y + {n_outputs - 1} hs_a tensors")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -988,7 +988,7 @@ class LatentDiffusion(DDPM):
|
||||
|
||||
def instantiate_cond_stage(self, config: OmegaConf) -> None:
|
||||
"""
|
||||
Build the conditioning stage model. Frozen models are converted to FP16.
|
||||
Build the conditioning stage model.
|
||||
|
||||
Args:
|
||||
config: OmegaConf config describing the conditioning model to instantiate.
|
||||
@@ -1000,7 +1000,6 @@ class LatentDiffusion(DDPM):
|
||||
self.cond_stage_model.train = disabled_train
|
||||
for param in self.cond_stage_model.parameters():
|
||||
param.requires_grad = False
|
||||
self.cond_stage_model.half()
|
||||
else:
|
||||
model = instantiate_from_config(config)
|
||||
self.cond_stage_model = model
|
||||
@@ -1015,7 +1014,6 @@ class LatentDiffusion(DDPM):
|
||||
Returns:
|
||||
Conditioning embedding as a tensor (shape depends on cond model).
|
||||
"""
|
||||
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||
if self.cond_stage_forward is None:
|
||||
if hasattr(self.cond_stage_model, 'encode') and callable(
|
||||
self.cond_stage_model.encode):
|
||||
@@ -1959,7 +1957,6 @@ class LatentVisualDiffusion(LatentDiffusion):
|
||||
self.image_proj_model.train = disabled_train
|
||||
for param in self.image_proj_model.parameters():
|
||||
param.requires_grad = False
|
||||
self.image_proj_model.half()
|
||||
|
||||
def _init_embedder(self, config: OmegaConf, freeze: bool = True) -> None:
|
||||
"""
|
||||
@@ -1975,7 +1972,6 @@ class LatentVisualDiffusion(LatentDiffusion):
|
||||
self.embedder.train = disabled_train
|
||||
for param in self.embedder.parameters():
|
||||
param.requires_grad = False
|
||||
self.embedder.half()
|
||||
|
||||
def init_normalizers(self, normalize_config: OmegaConf,
|
||||
dataset_stats: Mapping[str, Any]) -> None:
|
||||
@@ -2179,7 +2175,6 @@ class LatentVisualDiffusion(LatentDiffusion):
|
||||
(random_num < 3 * self.uncond_prob).float(), "n -> n 1 1 1")
|
||||
|
||||
cond_img = input_mask * img
|
||||
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||
cond_img_emb = self.embedder(cond_img)
|
||||
cond_img_emb = self.image_proj_model(cond_img_emb)
|
||||
|
||||
@@ -2196,7 +2191,6 @@ class LatentVisualDiffusion(LatentDiffusion):
|
||||
repeat=z.shape[2])
|
||||
cond["c_concat"] = [img_cat_cond]
|
||||
|
||||
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||
cond_action = self.action_projector(action)
|
||||
cond_action_emb = self.agent_action_pos_emb + cond_action
|
||||
# Get conditioning states
|
||||
@@ -2463,17 +2457,7 @@ class DiffusionWrapper(pl.LightningModule):
|
||||
Returns:
|
||||
Output from the inner diffusion model (tensor or tuple, depending on the model).
|
||||
"""
|
||||
with torch.cuda.amp.autocast(dtype=torch.float16):
|
||||
return self._forward_impl(x, x_action, x_state, t,
|
||||
c_concat, c_crossattn, c_crossattn_action,
|
||||
c_adm, s, mask, **kwargs)
|
||||
|
||||
def _forward_impl(
|
||||
self,
|
||||
x, x_action, x_state, t,
|
||||
c_concat=None, c_crossattn=None, c_crossattn_action=None,
|
||||
c_adm=None, s=None, mask=None, **kwargs,
|
||||
):
|
||||
if self.conditioning_key is None:
|
||||
out = self.diffusion_model(x, t)
|
||||
elif self.conditioning_key == 'concat':
|
||||
|
||||
@@ -688,6 +688,7 @@ class WMAModel(nn.Module):
|
||||
# Context precomputation cache
|
||||
self._ctx_cache_enabled = False
|
||||
self._ctx_cache = {}
|
||||
self._trt_backbone = None # TRT engine for video UNet backbone
|
||||
# Reusable CUDA stream for parallel state_unet / action_unet
|
||||
self._state_stream = torch.cuda.Stream()
|
||||
|
||||
@@ -700,6 +701,12 @@ class WMAModel(nn.Module):
|
||||
self.__dict__.update(state)
|
||||
self._state_stream = torch.cuda.Stream()
|
||||
|
||||
def load_trt_backbone(self, engine_path, n_hs_a=9):
|
||||
"""Load a TensorRT engine for the video UNet backbone."""
|
||||
from unifolm_wma.trt_utils import TRTBackbone
|
||||
self._trt_backbone = TRTBackbone(engine_path, n_hs_a=n_hs_a)
|
||||
print(f">>> TRT backbone loaded from {engine_path}")
|
||||
|
||||
def forward(self,
|
||||
x: Tensor,
|
||||
x_action: Tensor,
|
||||
@@ -812,6 +819,12 @@ class WMAModel(nn.Module):
|
||||
fs_embed = fs_embed.repeat_interleave(repeats=t, dim=0)
|
||||
emb = emb + fs_embed
|
||||
|
||||
if self._trt_backbone is not None:
|
||||
# TRT path: run backbone via TensorRT engine
|
||||
h_in = x.type(self.dtype).contiguous()
|
||||
y, hs_a = self._trt_backbone(h_in, emb.contiguous(), context.contiguous())
|
||||
else:
|
||||
# PyTorch path: original backbone
|
||||
h = x.type(self.dtype)
|
||||
adapter_idx = 0
|
||||
hs = []
|
||||
|
||||
151
src/unifolm_wma/trt_utils.py
Normal file
151
src/unifolm_wma/trt_utils.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""TensorRT acceleration utilities for the video UNet backbone."""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from einops import rearrange
|
||||
from unifolm_wma.modules.networks.wma_model import Downsample, Upsample
|
||||
|
||||
|
||||
class VideoBackboneForExport(nn.Module):
|
||||
"""Wrapper that isolates the video UNet backbone for ONNX export.
|
||||
|
||||
Takes already-preprocessed inputs (after context/time embedding prep)
|
||||
and returns y + hs_a as a flat tuple.
|
||||
"""
|
||||
|
||||
def __init__(self, wma_model):
|
||||
super().__init__()
|
||||
self.input_blocks = wma_model.input_blocks
|
||||
self.middle_block = wma_model.middle_block
|
||||
self.output_blocks = wma_model.output_blocks
|
||||
self.out = wma_model.out
|
||||
self.addition_attention = wma_model.addition_attention
|
||||
if self.addition_attention:
|
||||
self.init_attn = wma_model.init_attn
|
||||
self.dtype = wma_model.dtype
|
||||
|
||||
def forward(self, h, emb, context):
|
||||
t = 16
|
||||
b = 1
|
||||
|
||||
hs = []
|
||||
hs_a = []
|
||||
h = h.type(self.dtype)
|
||||
for id, module in enumerate(self.input_blocks):
|
||||
h = module(h, emb, context=context, batch_size=b)
|
||||
if id == 0 and self.addition_attention:
|
||||
h = self.init_attn(h, emb, context=context, batch_size=b)
|
||||
if id != 0:
|
||||
if isinstance(module[0], Downsample):
|
||||
hs_a.append(rearrange(hs[-1], '(b t) c h w -> b t c h w', t=t))
|
||||
hs.append(h)
|
||||
hs_a.append(rearrange(h, '(b t) c h w -> b t c h w', t=t))
|
||||
|
||||
h = self.middle_block(h, emb, context=context, batch_size=b)
|
||||
hs_a.append(rearrange(h, '(b t) c h w -> b t c h w', t=t))
|
||||
|
||||
hs_out = []
|
||||
for module in self.output_blocks:
|
||||
h = torch.cat([h, hs.pop()], dim=1)
|
||||
h = module(h, emb, context=context, batch_size=b)
|
||||
if isinstance(module[-1], Upsample):
|
||||
hs_a.append(rearrange(hs_out[-1], '(b t) c h w -> b t c h w', t=t))
|
||||
hs_out.append(h)
|
||||
hs_a.append(rearrange(hs_out[-1], '(b t) c h w -> b t c h w', t=t))
|
||||
|
||||
y = self.out(h.type(h.dtype))
|
||||
y = rearrange(y, '(b t) c h w -> b c t h w', b=b)
|
||||
return (y, *hs_a)
|
||||
|
||||
|
||||
def export_backbone_onnx(model, save_path, context_len=95):
|
||||
wma = model.model.diffusion_model
|
||||
wrapper = VideoBackboneForExport(wma)
|
||||
wrapper.eval().cuda()
|
||||
|
||||
for m in wrapper.modules():
|
||||
if hasattr(m, 'checkpoint'):
|
||||
m.checkpoint = False
|
||||
if hasattr(m, 'use_checkpoint'):
|
||||
m.use_checkpoint = False
|
||||
|
||||
import xformers.ops
|
||||
_orig_mea = xformers.ops.memory_efficient_attention
|
||||
def _sdpa_replacement(q, k, v, attn_bias=None, op=None, **kw):
|
||||
return torch.nn.functional.scaled_dot_product_attention(q, k, v, attn_mask=attn_bias)
|
||||
xformers.ops.memory_efficient_attention = _sdpa_replacement
|
||||
|
||||
BT = 16
|
||||
emb_dim = wma.model_channels * 4
|
||||
ctx_dim = 1024
|
||||
in_ch = wma.in_channels
|
||||
|
||||
dummy_h = torch.randn(BT, in_ch, 40, 64, device='cuda', dtype=torch.float32)
|
||||
dummy_emb = torch.randn(BT, emb_dim, device='cuda', dtype=torch.float32)
|
||||
dummy_ctx = torch.randn(BT, context_len, ctx_dim, device='cuda', dtype=torch.float32)
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = wrapper(dummy_h, dummy_emb, dummy_ctx)
|
||||
n_outputs = len(outputs)
|
||||
print(f">>> Backbone has {n_outputs} outputs (1 y + {n_outputs-1} hs_a)")
|
||||
for i, o in enumerate(outputs):
|
||||
print(f" output[{i}]: {o.shape} {o.dtype}")
|
||||
|
||||
output_names = ['y'] + [f'hs_a_{i}' for i in range(n_outputs - 1)]
|
||||
|
||||
torch.onnx.export(
|
||||
wrapper,
|
||||
(dummy_h, dummy_emb, dummy_ctx),
|
||||
save_path,
|
||||
input_names=['h', 'emb', 'context'],
|
||||
output_names=output_names,
|
||||
opset_version=17,
|
||||
do_constant_folding=True,
|
||||
)
|
||||
print(f">>> ONNX exported to {save_path}")
|
||||
xformers.ops.memory_efficient_attention = _orig_mea
|
||||
return n_outputs
|
||||
|
||||
|
||||
class TRTBackbone:
|
||||
"""TensorRT runtime wrapper for the video UNet backbone."""
|
||||
|
||||
def __init__(self, engine_path, n_hs_a=9):
|
||||
import tensorrt as trt
|
||||
|
||||
self.logger = trt.Logger(trt.Logger.WARNING)
|
||||
with open(engine_path, 'rb') as f:
|
||||
runtime = trt.Runtime(self.logger)
|
||||
self.engine = runtime.deserialize_cuda_engine(f.read())
|
||||
self.context = self.engine.create_execution_context()
|
||||
self.n_hs_a = n_hs_a
|
||||
|
||||
import numpy as np
|
||||
self.output_buffers = {}
|
||||
for i in range(self.engine.num_io_tensors):
|
||||
name = self.engine.get_tensor_name(i)
|
||||
if self.engine.get_tensor_mode(name) == trt.TensorIOMode.OUTPUT:
|
||||
shape = self.engine.get_tensor_shape(name)
|
||||
np_dtype = trt.nptype(self.engine.get_tensor_dtype(name))
|
||||
buf = torch.empty(list(shape), dtype=torch.from_numpy(np.empty(0, dtype=np_dtype)).dtype, device='cuda')
|
||||
self.output_buffers[name] = buf
|
||||
print(f" TRT output '{name}': {list(shape)} {buf.dtype}")
|
||||
|
||||
def __call__(self, h, emb, context):
|
||||
import tensorrt as trt
|
||||
for name, tensor in [('h', h), ('emb', emb), ('context', context)]:
|
||||
expected_dtype = trt.nptype(self.engine.get_tensor_dtype(name))
|
||||
torch_expected = torch.from_numpy(__import__('numpy').empty(0, dtype=expected_dtype)).dtype
|
||||
if tensor.dtype != torch_expected:
|
||||
tensor = tensor.to(torch_expected)
|
||||
self.context.set_tensor_address(name, tensor.contiguous().data_ptr())
|
||||
|
||||
for name, buf in self.output_buffers.items():
|
||||
self.context.set_tensor_address(name, buf.data_ptr())
|
||||
|
||||
self.context.execute_async_v3(torch.cuda.current_stream().cuda_stream)
|
||||
torch.cuda.synchronize()
|
||||
|
||||
y = self.output_buffers['y']
|
||||
hs_a = [self.output_buffers[f'hs_a_{i}'] for i in range(self.n_hs_a)]
|
||||
return y, hs_a
|
||||
@@ -1,10 +1,10 @@
|
||||
2026-02-11 13:57:30.192579: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 13:57:30.242090: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 13:57:30.242134: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 13:57:30.243443: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 13:57:30.250963: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
2026-02-18 19:01:56.891895: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-18 19:01:56.940243: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-18 19:01:56.940285: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-18 19:01:56.941395: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-18 19:01:56.948327: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 13:57:31.177911: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
2026-02-18 19:01:57.870809: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
@@ -26,65 +26,124 @@ INFO:root:***** Configing Data *****
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
TRT output 'y': [1, 4, 16, 40, 64] torch.float32
|
||||
TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32
|
||||
TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32
|
||||
>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s][02/18/2026-19:02:10] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
|
||||
|
||||
9%|▉ | 1/11 [00:17<02:51, 17.15s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [00:33<02:31, 16.87s/it]
|
||||
@@ -115,6 +174,6 @@ DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_g1_pack_camera/case1/unitree_g1_pack_camera_case1.mp4",
|
||||
"pred_video": "unitree_g1_pack_camera/case1/output/inference/0_full_fs6.mp4",
|
||||
"psnr": 34.28787704598647
|
||||
"psnr": 35.615362167470806
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_g1_pack_camera"
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
2026-02-11 14:04:18.542839: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:04:18.593447: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:04:18.593492: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:04:18.594810: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:04:18.602331: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
2026-02-18 19:05:45.956647: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-18 19:05:46.004149: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-18 19:05:46.004193: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-18 19:05:46.005265: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-18 19:05:46.012074: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:04:19.529518: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
2026-02-18 19:05:46.932966: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
@@ -26,65 +26,124 @@ INFO:root:***** Configing Data *****
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
TRT output 'y': [1, 4, 16, 40, 64] torch.float32
|
||||
TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32
|
||||
TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32
|
||||
>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s][02/18/2026-19:05:59] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
|
||||
|
||||
9%|▉ | 1/11 [00:16<02:47, 16.71s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [00:33<02:30, 16.75s/it]
|
||||
@@ -115,6 +174,6 @@ DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_g1_pack_camera/case2/unitree_g1_pack_camera_case2.mp4",
|
||||
"pred_video": "unitree_g1_pack_camera/case2/output/inference/50_full_fs6.mp4",
|
||||
"psnr": 43.756296364111726
|
||||
"psnr": 34.61979248212279
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_g1_pack_camera"
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
2026-02-11 14:11:08.388455: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:11:08.437992: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:11:08.438037: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:11:08.439358: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:11:08.446903: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
2026-02-18 19:09:35.113634: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-18 19:09:35.161428: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-18 19:09:35.161474: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-18 19:09:35.162551: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-18 19:09:35.169325: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:11:09.369764: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
2026-02-18 19:09:36.089250: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
@@ -26,95 +26,121 @@ INFO:root:***** Configing Data *****
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
TRT output 'y': [1, 4, 16, 40, 64] torch.float32
|
||||
TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32
|
||||
TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32
|
||||
>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s][02/18/2026-19:09:49] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
|
||||
|
||||
9%|▉ | 1/11 [00:16<02:45, 16.53s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [01:08<05:08, 34.25s/it]
|
||||
27%|██▋ | 3/11 [01:42<04:33, 34.25s/it]
|
||||
36%|███▋ | 4/11 [02:16<03:59, 34.22s/it]
|
||||
45%|████▌ | 5/11 [02:51<03:25, 34.20s/it]
|
||||
55%|█████▍ | 6/11 [03:25<02:50, 34.16s/it]
|
||||
64%|██████▎ | 7/11 [03:59<02:16, 34.14s/it]
|
||||
73%|███████▎ | 8/11 [04:33<01:42, 34.13s/it]
|
||||
82%|████████▏ | 9/11 [05:07<01:08, 34.11s/it]
|
||||
91%|█████████ | 10/11 [05:41<00:34, 34.09s/it]
|
||||
100%|██████████| 11/11 [06:15<00:00, 34.07s/it]
|
||||
100%|██████████| 11/11 [06:15<00:00, 34.13s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_g1_pack_camera/case3/unitree_g1_pack_camera_case3.mp4",
|
||||
"pred_video": "unitree_g1_pack_camera/case3/output/inference/100_full_fs6.mp4",
|
||||
"psnr": 37.65161306938167
|
||||
"psnr": 37.034952654534486
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_g1_pack_camera"
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,120 +0,0 @@
|
||||
2026-02-11 14:17:57.092085: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:17:57.141607: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:17:57.141661: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:17:57.142984: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:17:57.150517: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:17:58.074812: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]
|
||||
9%|▉ | 1/11 [00:34<05:41, 34.20s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [01:08<05:08, 34.29s/it]
|
||||
27%|██▋ | 3/11 [01:42<04:34, 34.25s/it]
|
||||
36%|███▋ | 4/11 [02:16<03:59, 34.23s/it]
|
||||
45%|████▌ | 5/11 [02:51<03:25, 34.19s/it]
|
||||
55%|█████▍ | 6/11 [03:25<02:50, 34.17s/it]
|
||||
64%|██████▎ | 7/11 [03:59<02:16, 34.19s/it]
|
||||
73%|███████▎ | 8/11 [04:33<01:42, 34.18s/it]
|
||||
82%|████████▏ | 9/11 [05:07<01:08, 34.16s/it]
|
||||
91%|█████████ | 10/11 [05:41<00:34, 34.13s/it]
|
||||
100%|██████████| 11/11 [06:15<00:00, 34.11s/it]
|
||||
100%|██████████| 11/11 [06:15<00:00, 34.17s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_g1_pack_camera/case4/unitree_g1_pack_camera_case4.mp4",
|
||||
"pred_video": "unitree_g1_pack_camera/case4/output/inference/200_full_fs6.mp4",
|
||||
"psnr": 33.205596596179475
|
||||
"psnr": 31.43390896360405
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_g1_pack_camera"
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,13 +1,24 @@
|
||||
2026-02-11 14:24:46.595601: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:24:46.645554: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:24:46.645598: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:24:46.646935: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:24:46.654595: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
2026-02-10 15:38:28.973314: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-10 15:38:29.023024: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-10 15:38:29.023070: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-10 15:38:29.024393: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-10 15:38:29.031901: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:24:47.580547: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
2026-02-10 15:38:29.955454: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
|
||||
AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): hf-mirror.com:443
|
||||
DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
INFO:root:Loaded ViT-H-14 model config.
|
||||
DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
|
||||
INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
|
||||
>>> model checkpoint loaded.
|
||||
>>> Load pre-trained model ...
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
@@ -25,16 +36,13 @@ INFO:root:***** Configing Data *****
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
|
||||
0%| | 0/8 [00:00<?, ?it/s]>>> Step 0: generating actions ...
|
||||
12%|█▎ | 1/8 [00:34<03:58, 34.12s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
@@ -84,7 +92,9 @@ DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
12%|█▎ | 1/8 [01:14<08:41, 74.51s/it]
|
||||
25%|██▌ | 2/8 [02:29<07:28, 74.79s/it]
|
||||
38%|███▊ | 3/8 [03:44<06:14, 74.81s/it]
|
||||
50%|█████ | 4/8 [04:59<04:59, 74.78s/it]
|
||||
@@ -106,6 +116,6 @@ DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case1/unitree_z1_dual_arm_cleanup_pencils_case1.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case1/output/inference/0_full_fs4.mp4",
|
||||
"psnr": 48.52515070316814
|
||||
"psnr": 47.911564449209735
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_cleanup_pencils"
|
||||
--n_iter 8 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
2026-02-11 14:29:51.911195: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:29:51.961101: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:29:51.961156: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:29:51.962467: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:29:51.969980: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:29:52.899745: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/8 [00:00<?, ?it/s]
|
||||
12%|█▎ | 1/8 [00:34<03:59, 34.22s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
25%|██▌ | 2/8 [01:08<03:25, 34.24s/it]
|
||||
38%|███▊ | 3/8 [01:42<02:51, 34.21s/it]
|
||||
50%|█████ | 4/8 [02:16<02:16, 34.18s/it]
|
||||
62%|██████▎ | 5/8 [02:50<01:42, 34.15s/it]
|
||||
75%|███████▌ | 6/8 [03:24<01:08, 34.11s/it]
|
||||
88%|████████▊ | 7/8 [03:58<00:34, 34.10s/it]
|
||||
100%|██████████| 8/8 [04:32<00:00, 34.07s/it]
|
||||
100%|██████████| 8/8 [04:32<00:00, 34.12s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case2/unitree_z1_dual_arm_cleanup_pencils_case2.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case2/output/inference/50_full_fs4.mp4",
|
||||
"psnr": 47.91455867741451
|
||||
"psnr": 48.344571927558974
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_cleanup_pencils"
|
||||
--n_iter 8 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
2026-02-11 14:34:58.016000: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:34:58.066369: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:34:58.066418: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:34:58.067763: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:34:58.075447: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:34:59.008184: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/8 [00:00<?, ?it/s]
|
||||
12%|█▎ | 1/8 [00:34<03:58, 34.14s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
25%|██▌ | 2/8 [01:08<03:25, 34.21s/it]
|
||||
38%|███▊ | 3/8 [01:42<02:50, 34.18s/it]
|
||||
50%|█████ | 4/8 [02:16<02:16, 34.16s/it]
|
||||
62%|██████▎ | 5/8 [02:50<01:42, 34.11s/it]
|
||||
75%|███████▌ | 6/8 [03:24<01:08, 34.08s/it]
|
||||
88%|████████▊ | 7/8 [03:58<00:34, 34.09s/it]
|
||||
100%|██████████| 8/8 [04:32<00:00, 34.08s/it]
|
||||
100%|██████████| 8/8 [04:32<00:00, 34.11s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case3/unitree_z1_dual_arm_cleanup_pencils_case3.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case3/output/inference/100_full_fs4.mp4",
|
||||
"psnr": 41.260758562627046
|
||||
"psnr": 41.152374490134825
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_cleanup_pencils"
|
||||
--n_iter 8 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,111 +0,0 @@
|
||||
2026-02-11 14:40:06.781951: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:40:06.832600: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:40:06.832649: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:40:06.833982: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:40:06.841504: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:40:07.772162: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/8 [00:00<?, ?it/s]
|
||||
12%|█▎ | 1/8 [00:34<03:59, 34.18s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
25%|██▌ | 2/8 [01:08<03:25, 34.26s/it]
|
||||
38%|███▊ | 3/8 [01:42<02:51, 34.23s/it]
|
||||
50%|█████ | 4/8 [02:16<02:16, 34.20s/it]
|
||||
62%|██████▎ | 5/8 [02:50<01:42, 34.16s/it]
|
||||
75%|███████▌ | 6/8 [03:25<01:08, 34.14s/it]
|
||||
88%|████████▊ | 7/8 [03:59<00:34, 34.12s/it]
|
||||
100%|██████████| 8/8 [04:33<00:00, 34.12s/it]
|
||||
100%|██████████| 8/8 [04:33<00:00, 34.15s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_cleanup_pencils/case4/unitree_z1_dual_arm_cleanup_pencils_case4.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_cleanup_pencils/case4/output/inference/200_full_fs4.mp4",
|
||||
"psnr": 47.046499351779815
|
||||
"psnr": 46.025723557253855
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_cleanup_pencils"
|
||||
--n_iter 8 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
2026-02-11 14:45:16.672502: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:45:16.722666: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:45:16.722716: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:45:16.724025: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:45:16.731562: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:45:17.646917: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/7 [00:00<?, ?it/s]
|
||||
14%|█▍ | 1/7 [00:34<03:24, 34.09s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
29%|██▊ | 2/7 [01:08<02:50, 34.15s/it]
|
||||
43%|████▎ | 3/7 [01:42<02:16, 34.13s/it]
|
||||
57%|█████▋ | 4/7 [02:16<01:42, 34.09s/it]
|
||||
71%|███████▏ | 5/7 [02:50<01:08, 34.09s/it]
|
||||
86%|████████▌ | 6/7 [03:24<00:34, 34.09s/it]
|
||||
100%|██████████| 7/7 [03:58<00:00, 34.08s/it]
|
||||
100%|██████████| 7/7 [03:58<00:00, 34.09s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox/case1/unitree_z1_dual_arm_stackbox_case1.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox/case1/output/inference/5_full_fs4.mp4",
|
||||
"psnr": 43.97044934749157
|
||||
"psnr": 44.3480149502738
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_stackbox"
|
||||
--n_iter 7 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
2026-02-11 14:49:54.901507: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:49:54.951975: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:49:54.952023: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:49:54.953338: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:49:54.960938: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:49:55.888131: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/7 [00:00<?, ?it/s]
|
||||
14%|█▍ | 1/7 [00:34<03:25, 34.18s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
29%|██▊ | 2/7 [01:08<02:51, 34.26s/it]
|
||||
43%|████▎ | 3/7 [01:42<02:16, 34.25s/it]
|
||||
57%|█████▋ | 4/7 [02:16<01:42, 34.22s/it]
|
||||
71%|███████▏ | 5/7 [02:50<01:08, 34.17s/it]
|
||||
86%|████████▌ | 6/7 [03:25<00:34, 34.13s/it]
|
||||
100%|██████████| 7/7 [03:59<00:00, 34.11s/it]
|
||||
100%|██████████| 7/7 [03:59<00:00, 34.16s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox/case2/unitree_z1_dual_arm_stackbox_case2.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox/case2/output/inference/15_full_fs4.mp4",
|
||||
"psnr": 43.70773432165555
|
||||
"psnr": 39.867728254007716
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_stackbox"
|
||||
--n_iter 7 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
2026-02-11 14:54:33.079012: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:54:33.128851: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:54:33.128900: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:54:33.130229: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:54:33.137786: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:54:34.065218: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/7 [00:00<?, ?it/s]
|
||||
14%|█▍ | 1/7 [00:34<03:25, 34.18s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
29%|██▊ | 2/7 [01:08<02:51, 34.24s/it]
|
||||
43%|████▎ | 3/7 [01:42<02:16, 34.25s/it]
|
||||
57%|█████▋ | 4/7 [02:16<01:42, 34.17s/it]
|
||||
71%|███████▏ | 5/7 [02:50<01:08, 34.13s/it]
|
||||
86%|████████▌ | 6/7 [03:24<00:34, 34.11s/it]
|
||||
100%|██████████| 7/7 [03:58<00:00, 34.09s/it]
|
||||
100%|██████████| 7/7 [03:58<00:00, 34.13s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox/case3/unitree_z1_dual_arm_stackbox_case3.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox/case3/output/inference/25_full_fs4.mp4",
|
||||
"psnr": 48.68206289825236
|
||||
"psnr": 39.19101039445159
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_stackbox"
|
||||
--n_iter 7 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,108 +0,0 @@
|
||||
2026-02-11 14:59:11.849320: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 14:59:11.899274: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 14:59:11.899322: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 14:59:11.900640: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 14:59:11.908158: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 14:59:12.830387: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/7 [00:00<?, ?it/s]
|
||||
14%|█▍ | 1/7 [00:34<03:24, 34.11s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
29%|██▊ | 2/7 [01:08<02:50, 34.17s/it]
|
||||
43%|████▎ | 3/7 [01:42<02:16, 34.13s/it]
|
||||
57%|█████▋ | 4/7 [02:16<01:42, 34.12s/it]
|
||||
71%|███████▏ | 5/7 [02:50<01:08, 34.07s/it]
|
||||
86%|████████▌ | 6/7 [03:24<00:34, 34.07s/it]
|
||||
100%|██████████| 7/7 [03:58<00:00, 34.06s/it]
|
||||
100%|██████████| 7/7 [03:58<00:00, 34.09s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox/case4/unitree_z1_dual_arm_stackbox_case4.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox/case4/output/inference/35_full_fs4.mp4",
|
||||
"psnr": 42.117165235043196
|
||||
"psnr": 40.29563315341769
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_stackbox"
|
||||
--n_iter 7 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,16 +1,13 @@
|
||||
2026-02-11 16:58:21.710140: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 16:58:21.759418: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 16:58:21.759461: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 16:58:21.760752: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 16:58:21.768205: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
2026-02-18 18:49:49.117856: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-18 18:49:49.165270: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-18 18:49:49.165322: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-18 18:49:49.166382: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-18 18:49:49.173299: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 16:58:22.691154: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
2026-02-18 18:49:50.090214: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
>>> Diffusion backbone (model.model) converted to FP16.
|
||||
>>> Projectors (image_proj_model, state_projector, action_projector) converted to FP16.
|
||||
>>> Encoders (cond_stage_model, embedder) converted to FP16.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
@@ -29,45 +26,27 @@ INFO:root:***** Configing Data *****
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
TRT output 'y': [1, 4, 16, 40, 64] torch.float32
|
||||
TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32
|
||||
TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32
|
||||
>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s][02/18/2026-18:50:03] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
|
||||
|
||||
9%|▉ | 1/11 [00:15<02:38, 15.88s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
36%|███▋ | 4/11 [01:31<02:39, 22.83s/it]
|
||||
45%|████▌ | 5/11 [01:54<02:17, 22.83s/it]
|
||||
55%|█████▍ | 6/11 [02:17<01:54, 22.83s/it]
|
||||
64%|██████▎ | 7/11 [02:39<01:31, 22.83s/it]
|
||||
73%|███████▎ | 8/11 [03:02<01:08, 22.83s/it]
|
||||
82%|████████▏ | 9/11 [03:25<00:45, 22.81s/it]
|
||||
91%|█████████ | 10/11 [03:48<00:22, 22.81s/it]
|
||||
100%|██████████| 11/11 [04:11<00:00, 22.79s/it]
|
||||
100%|██████████| 11/11 [04:11<00:00, 22.83s/it]
|
||||
>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
@@ -117,7 +96,37 @@ DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [00:31<02:21, 15.71s/it]
|
||||
27%|██▋ | 3/11 [00:47<02:06, 15.86s/it]
|
||||
36%|███▋ | 4/11 [01:03<01:51, 15.90s/it]
|
||||
45%|████▌ | 5/11 [01:19<01:36, 16.06s/it]
|
||||
55%|█████▍ | 6/11 [01:35<01:19, 15.98s/it]
|
||||
64%|██████▎ | 7/11 [01:51<01:04, 16.09s/it]
|
||||
73%|███████▎ | 8/11 [02:08<00:48, 16.08s/it]
|
||||
82%|████████▏ | 9/11 [02:24<00:32, 16.16s/it]
|
||||
91%|█████████ | 10/11 [02:40<00:16, 16.13s/it]
|
||||
100%|██████████| 11/11 [02:56<00:00, 16.09s/it]
|
||||
100%|██████████| 11/11 [02:56<00:00, 16.04s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case1/unitree_z1_dual_arm_stackbox_v2_case1.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case1/output/inference/5_full_fs4.mp4",
|
||||
"psnr": 26.683000215343522
|
||||
"psnr": 27.62636266067224
|
||||
}
|
||||
@@ -1,120 +0,0 @@
|
||||
2026-02-11 15:10:45.687888: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 15:10:45.738006: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 15:10:45.738054: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 15:10:45.739410: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 15:10:45.747229: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 15:10:46.687896: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]
|
||||
9%|▉ | 1/11 [00:34<05:41, 34.16s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [01:08<05:08, 34.26s/it]
|
||||
27%|██▋ | 3/11 [01:42<04:33, 34.20s/it]
|
||||
36%|███▋ | 4/11 [02:16<03:59, 34.16s/it]
|
||||
45%|████▌ | 5/11 [02:50<03:24, 34.15s/it]
|
||||
55%|█████▍ | 6/11 [03:24<02:50, 34.13s/it]
|
||||
64%|██████▎ | 7/11 [03:59<02:16, 34.13s/it]
|
||||
73%|███████▎ | 8/11 [04:33<01:42, 34.13s/it]
|
||||
82%|████████▏ | 9/11 [05:07<01:08, 34.12s/it]
|
||||
91%|█████████ | 10/11 [05:41<00:34, 34.11s/it]
|
||||
100%|██████████| 11/11 [06:15<00:00, 34.09s/it]
|
||||
100%|██████████| 11/11 [06:15<00:00, 34.13s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case2/unitree_z1_dual_arm_stackbox_v2_case2.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case2/output/inference/15_full_fs4.mp4",
|
||||
"psnr": 33.945563782754554
|
||||
"psnr": 33.90444714332389
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_stackbox_v2"
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,120 +0,0 @@
|
||||
2026-02-11 15:17:41.661323: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 15:17:41.711317: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 15:17:41.711373: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 15:17:41.712706: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 15:17:41.720248: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 15:17:42.650151: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s]
|
||||
9%|▉ | 1/11 [00:34<05:41, 34.15s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [01:08<05:08, 34.25s/it]
|
||||
27%|██▋ | 3/11 [01:42<04:33, 34.23s/it]
|
||||
36%|███▋ | 4/11 [02:16<03:59, 34.18s/it]
|
||||
45%|████▌ | 5/11 [02:50<03:24, 34.17s/it]
|
||||
55%|█████▍ | 6/11 [03:25<02:50, 34.15s/it]
|
||||
64%|██████▎ | 7/11 [03:59<02:16, 34.13s/it]
|
||||
73%|███████▎ | 8/11 [04:33<01:42, 34.12s/it]
|
||||
82%|████████▏ | 9/11 [05:07<01:08, 34.09s/it]
|
||||
91%|█████████ | 10/11 [05:41<00:34, 34.09s/it]
|
||||
100%|██████████| 11/11 [06:15<00:00, 34.09s/it]
|
||||
100%|██████████| 11/11 [06:15<00:00, 34.13s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case3/unitree_z1_dual_arm_stackbox_v2_case3.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case3/output/inference/25_full_fs4.mp4",
|
||||
"psnr": 31.86126241517472
|
||||
"psnr": 34.50192428908007
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_stackbox_v2"
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
2026-02-11 15:24:38.487806: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 15:24:38.538144: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 15:24:38.538200: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 15:24:38.539554: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 15:24:38.547185: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
2026-02-18 18:54:56.403136: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-18 18:54:56.451144: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-18 18:54:56.451189: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-18 18:54:56.452312: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-18 18:54:56.459281: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 15:24:39.470885: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
2026-02-18 18:54:57.381032: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
@@ -26,65 +26,124 @@ INFO:root:***** Configing Data *****
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
TRT output 'y': [1, 4, 16, 40, 64] torch.float32
|
||||
TRT output 'hs_a_0': [1, 16, 320, 40, 64] torch.float32
|
||||
TRT output 'hs_a_1': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_2': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_3': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_4': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_5': [1, 16, 1280, 5, 8] torch.float32
|
||||
TRT output 'hs_a_6': [1, 16, 1280, 10, 16] torch.float32
|
||||
TRT output 'hs_a_7': [1, 16, 640, 20, 32] torch.float32
|
||||
TRT output 'hs_a_8': [1, 16, 320, 40, 64] torch.float32
|
||||
>>> TRT backbone loaded from /home/qhy/unifolm-world-model-action/scripts/evaluation/../../trt_engines/video_backbone.engine
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
|
||||
0%| | 0/11 [00:00<?, ?it/s][02/18/2026-18:55:10] [TRT] [W] Using default stream in enqueueV3() may lead to performance issues due to additional calls to cudaStreamSynchronize() by TensorRT to ensure correct synchronization. Please use non-default stream instead.
|
||||
|
||||
9%|▉ | 1/11 [00:16<02:45, 16.53s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
18%|█▊ | 2/11 [00:33<02:28, 16.52s/it]
|
||||
@@ -115,6 +174,6 @@ DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_dual_arm_stackbox_v2/case4/unitree_z1_dual_arm_stackbox_v2_case4.mp4",
|
||||
"pred_video": "unitree_z1_dual_arm_stackbox_v2/case4/output/inference/35_full_fs4.mp4",
|
||||
"psnr": 39.90908062249536
|
||||
"psnr": 25.49270910031428
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_dual_arm_stackbox_v2"
|
||||
--n_iter 11 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,123 +0,0 @@
|
||||
2026-02-11 15:31:35.657972: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 15:31:35.707733: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 15:31:35.707792: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 15:31:35.709109: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 15:31:35.716616: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 15:31:36.648540: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/12 [00:00<?, ?it/s]
|
||||
8%|▊ | 1/12 [00:34<06:15, 34.17s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
17%|█▋ | 2/12 [01:08<05:42, 34.24s/it]
|
||||
25%|██▌ | 3/12 [01:42<05:07, 34.20s/it]
|
||||
33%|███▎ | 4/12 [02:16<04:33, 34.19s/it]
|
||||
42%|████▏ | 5/12 [02:50<03:58, 34.14s/it]
|
||||
50%|█████ | 6/12 [03:24<03:24, 34.09s/it]
|
||||
58%|█████▊ | 7/12 [03:58<02:50, 34.07s/it]
|
||||
67%|██████▋ | 8/12 [04:32<02:16, 34.07s/it]
|
||||
75%|███████▌ | 9/12 [05:06<01:42, 34.05s/it]
|
||||
83%|████████▎ | 10/12 [05:41<01:08, 34.06s/it]
|
||||
92%|█████████▏| 11/12 [06:15<00:34, 34.08s/it]
|
||||
100%|██████████| 12/12 [06:49<00:00, 34.07s/it]
|
||||
100%|██████████| 12/12 [06:49<00:00, 34.10s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 8: generating actions ...
|
||||
>>> Step 8: interacting with world model ...
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_stackbox/case1/unitree_z1_stackbox_case1.mp4",
|
||||
"pred_video": "unitree_z1_stackbox/case1/output/inference/5_full_fs4.mp4",
|
||||
"psnr": 49.42336701518203
|
||||
"psnr": 42.83913947323794
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_stackbox"
|
||||
--n_iter 12 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,123 +0,0 @@
|
||||
2026-02-11 15:39:01.409308: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 15:39:01.459136: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 15:39:01.459190: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 15:39:01.460507: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 15:39:01.468019: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 15:39:02.395912: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/12 [00:00<?, ?it/s]
|
||||
8%|▊ | 1/12 [00:34<06:15, 34.10s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
17%|█▋ | 2/12 [01:08<05:41, 34.18s/it]
|
||||
25%|██▌ | 3/12 [01:42<05:07, 34.15s/it]
|
||||
33%|███▎ | 4/12 [02:16<04:33, 34.13s/it]
|
||||
42%|████▏ | 5/12 [02:50<03:58, 34.09s/it]
|
||||
50%|█████ | 6/12 [03:24<03:24, 34.08s/it]
|
||||
58%|█████▊ | 7/12 [03:58<02:50, 34.06s/it]
|
||||
67%|██████▋ | 8/12 [04:32<02:16, 34.03s/it]
|
||||
75%|███████▌ | 9/12 [05:06<01:42, 34.03s/it]
|
||||
83%|████████▎ | 10/12 [05:40<01:08, 34.03s/it]
|
||||
92%|█████████▏| 11/12 [06:14<00:34, 34.02s/it]
|
||||
100%|██████████| 12/12 [06:48<00:00, 34.00s/it]
|
||||
100%|██████████| 12/12 [06:48<00:00, 34.05s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 8: generating actions ...
|
||||
>>> Step 8: interacting with world model ...
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_stackbox/case2/unitree_z1_stackbox_case2.mp4",
|
||||
"pred_video": "unitree_z1_stackbox/case2/output/inference/15_full_fs4.mp4",
|
||||
"psnr": 48.88265200549669
|
||||
"psnr": 48.64571989587276
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_stackbox"
|
||||
--n_iter 12 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,123 +0,0 @@
|
||||
2026-02-11 15:46:27.304090: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 15:46:27.354074: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 15:46:27.354120: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 15:46:27.355468: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 15:46:27.363130: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 15:46:28.290783: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/12 [00:00<?, ?it/s]
|
||||
8%|▊ | 1/12 [00:34<06:15, 34.15s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
17%|█▋ | 2/12 [01:08<05:42, 34.24s/it]
|
||||
25%|██▌ | 3/12 [01:42<05:07, 34.22s/it]
|
||||
33%|███▎ | 4/12 [02:16<04:33, 34.16s/it]
|
||||
42%|████▏ | 5/12 [02:50<03:58, 34.11s/it]
|
||||
50%|█████ | 6/12 [03:24<03:24, 34.08s/it]
|
||||
58%|█████▊ | 7/12 [03:58<02:50, 34.04s/it]
|
||||
67%|██████▋ | 8/12 [04:32<02:16, 34.02s/it]
|
||||
75%|███████▌ | 9/12 [05:06<01:41, 33.99s/it]
|
||||
83%|████████▎ | 10/12 [05:40<01:07, 33.99s/it]
|
||||
92%|█████████▏| 11/12 [06:14<00:33, 33.97s/it]
|
||||
100%|██████████| 12/12 [06:48<00:00, 33.96s/it]
|
||||
100%|██████████| 12/12 [06:48<00:00, 34.04s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 8: generating actions ...
|
||||
>>> Step 8: interacting with world model ...
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_stackbox/case3/unitree_z1_stackbox_case3.mp4",
|
||||
"pred_video": "unitree_z1_stackbox/case3/output/inference/25_full_fs4.mp4",
|
||||
"psnr": 50.884297816906816
|
||||
"psnr": 45.127553229898034
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_stackbox"
|
||||
--n_iter 12 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
@@ -1,123 +0,0 @@
|
||||
2026-02-11 15:53:52.504337: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
||||
2026-02-11 15:53:52.554351: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
||||
2026-02-11 15:53:52.554397: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
||||
2026-02-11 15:53:52.555718: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
||||
2026-02-11 15:53:52.563252: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
||||
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
||||
2026-02-11 15:53:53.493343: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
||||
Global seed set to 123
|
||||
>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
|
||||
>>> Prepared model loaded.
|
||||
INFO:root:***** Configing Data *****
|
||||
>>> unitree_z1_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_stackbox: data stats loaded.
|
||||
>>> unitree_z1_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
|
||||
>>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
|
||||
>>> unitree_g1_pack_camera: 1 data samples loaded.
|
||||
>>> unitree_g1_pack_camera: data stats loaded.
|
||||
>>> unitree_g1_pack_camera: normalizer initiated.
|
||||
>>> Dataset is successfully loaded ...
|
||||
✓ KV fused: 66 attention layers
|
||||
>>> Generate 16 frames under each generation ...
|
||||
DEBUG:h5py._conv:Creating converter from 3 to 5
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
|
||||
DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
|
||||
|
||||
0%| | 0/12 [00:00<?, ?it/s]
|
||||
8%|▊ | 1/12 [00:34<06:15, 34.14s/it]>>> Step 0: generating actions ...
|
||||
>>> Step 0: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 1: generating actions ...
|
||||
DEBUG:PIL.Image:Importing BlpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BmpImagePlugin
|
||||
DEBUG:PIL.Image:Importing BufrStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing CurImagePlugin
|
||||
DEBUG:PIL.Image:Importing DcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing DdsImagePlugin
|
||||
DEBUG:PIL.Image:Importing EpsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsImagePlugin
|
||||
DEBUG:PIL.Image:Importing FitsStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing FliImagePlugin
|
||||
DEBUG:PIL.Image:Importing FpxImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing FtexImagePlugin
|
||||
DEBUG:PIL.Image:Importing GbrImagePlugin
|
||||
DEBUG:PIL.Image:Importing GifImagePlugin
|
||||
DEBUG:PIL.Image:Importing GribStubImagePlugin
|
||||
DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcnsImagePlugin
|
||||
DEBUG:PIL.Image:Importing IcoImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImImagePlugin
|
||||
DEBUG:PIL.Image:Importing ImtImagePlugin
|
||||
DEBUG:PIL.Image:Importing IptcImagePlugin
|
||||
DEBUG:PIL.Image:Importing JpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
|
||||
DEBUG:PIL.Image:Importing McIdasImagePlugin
|
||||
DEBUG:PIL.Image:Importing MicImagePlugin
|
||||
DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
|
||||
DEBUG:PIL.Image:Importing MpegImagePlugin
|
||||
DEBUG:PIL.Image:Importing MpoImagePlugin
|
||||
DEBUG:PIL.Image:Importing MspImagePlugin
|
||||
DEBUG:PIL.Image:Importing PalmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcdImagePlugin
|
||||
DEBUG:PIL.Image:Importing PcxImagePlugin
|
||||
DEBUG:PIL.Image:Importing PdfImagePlugin
|
||||
DEBUG:PIL.Image:Importing PixarImagePlugin
|
||||
DEBUG:PIL.Image:Importing PngImagePlugin
|
||||
DEBUG:PIL.Image:Importing PpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing PsdImagePlugin
|
||||
DEBUG:PIL.Image:Importing QoiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SgiImagePlugin
|
||||
DEBUG:PIL.Image:Importing SpiderImagePlugin
|
||||
DEBUG:PIL.Image:Importing SunImagePlugin
|
||||
DEBUG:PIL.Image:Importing TgaImagePlugin
|
||||
DEBUG:PIL.Image:Importing TiffImagePlugin
|
||||
DEBUG:PIL.Image:Importing WebPImagePlugin
|
||||
DEBUG:PIL.Image:Importing WmfImagePlugin
|
||||
DEBUG:PIL.Image:Importing XbmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XpmImagePlugin
|
||||
DEBUG:PIL.Image:Importing XVThumbImagePlugin
|
||||
|
||||
17%|█▋ | 2/12 [01:08<05:41, 34.19s/it]
|
||||
25%|██▌ | 3/12 [01:42<05:07, 34.15s/it]
|
||||
33%|███▎ | 4/12 [02:16<04:32, 34.12s/it]
|
||||
42%|████▏ | 5/12 [02:50<03:58, 34.09s/it]
|
||||
50%|█████ | 6/12 [03:24<03:24, 34.05s/it]
|
||||
58%|█████▊ | 7/12 [03:58<02:50, 34.03s/it]
|
||||
67%|██████▋ | 8/12 [04:32<02:16, 34.04s/it]
|
||||
75%|███████▌ | 9/12 [05:06<01:42, 34.03s/it]
|
||||
83%|████████▎ | 10/12 [05:40<01:08, 34.01s/it]
|
||||
92%|█████████▏| 11/12 [06:14<00:34, 34.03s/it]
|
||||
100%|██████████| 12/12 [06:48<00:00, 34.02s/it]
|
||||
100%|██████████| 12/12 [06:48<00:00, 34.05s/it]
|
||||
>>> Step 1: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 2: generating actions ...
|
||||
>>> Step 2: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 3: generating actions ...
|
||||
>>> Step 3: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 4: generating actions ...
|
||||
>>> Step 4: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 5: generating actions ...
|
||||
>>> Step 5: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 6: generating actions ...
|
||||
>>> Step 6: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 7: generating actions ...
|
||||
>>> Step 7: interacting with world model ...
|
||||
>>>>>>>>>>>>>>>>>>>>>>>>
|
||||
>>> Step 8: generating actions ...
|
||||
>>> Step 8: interacting with world model ...
|
||||
@@ -1,5 +1,5 @@
|
||||
{
|
||||
"gt_video": "unitree_z1_stackbox/case4/unitree_z1_stackbox_case4.mp4",
|
||||
"pred_video": "unitree_z1_stackbox/case4/output/inference/35_full_fs4.mp4",
|
||||
"psnr": 47.85197517791449
|
||||
"psnr": 50.642542240144444
|
||||
}
|
||||
@@ -20,6 +20,5 @@ dataset="unitree_z1_stackbox"
|
||||
--n_iter 12 \
|
||||
--timestep_spacing 'uniform_trailing' \
|
||||
--guidance_rescale 0.7 \
|
||||
--perframe_ae \
|
||||
--fast_policy_no_decode
|
||||
--perframe_ae
|
||||
} 2>&1 | tee "${res_dir}/output.log"
|
||||
|
||||
Reference in New Issue
Block a user