From afa12ba031f50588597aa17146bacc1286224390 Mon Sep 17 00:00:00 2001 From: qhy <2728290997@qq.com> Date: Tue, 10 Feb 2026 19:54:53 +0800 Subject: [PATCH] =?UTF-8?q?=E6=AF=8F=E6=AD=A5=E8=BF=AD=E4=BB=A3=E4=BF=9D?= =?UTF-8?q?=E5=AD=98=E5=BC=82=E6=AD=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .claude/settings.local.json | 3 +- .gitignore | 3 +- scripts/evaluation/world_model_interaction.py | 123 +++++++++++++++--- .../case1/output.log | 41 +++--- 4 files changed, 121 insertions(+), 49 deletions(-) diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 79f6368..3bfcae1 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -4,7 +4,8 @@ "Bash(conda env list:*)", "Bash(mamba env:*)", "Bash(micromamba env list:*)", - "Bash(echo:*)" + "Bash(echo:*)", + "Bash(git show:*)" ] } } diff --git a/.gitignore b/.gitignore index d6bfa21..a661276 100644 --- a/.gitignore +++ b/.gitignore @@ -130,4 +130,5 @@ Experiment/log *.ckpt -*.0 \ No newline at end of file +*.0 +ckpts/unifolm_wma_dual.ckpt.prepared.pt diff --git a/scripts/evaluation/world_model_interaction.py b/scripts/evaluation/world_model_interaction.py index ad103a7..7341151 100644 --- a/scripts/evaluation/world_model_interaction.py +++ b/scripts/evaluation/world_model_interaction.py @@ -9,6 +9,8 @@ import logging import einops import warnings import imageio +import atexit +from concurrent.futures import ThreadPoolExecutor from pytorch_lightning import seed_everything from omegaconf import OmegaConf @@ -16,8 +18,9 @@ from tqdm import tqdm from einops import rearrange, repeat from collections import OrderedDict from torch import nn -from eval_utils import populate_queues, log_to_tensorboard +from eval_utils import populate_queues from collections import deque +from typing import Optional, List, Any torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True @@ -153,6 +156,81 @@ def save_results(video: Tensor, filename: str, fps: int = 8) -> None: options={'crf': '10'}) +# ========== Async I/O ========== +_io_executor: Optional[ThreadPoolExecutor] = None +_io_futures: List[Any] = [] + + +def _get_io_executor() -> ThreadPoolExecutor: + global _io_executor + if _io_executor is None: + _io_executor = ThreadPoolExecutor(max_workers=2) + return _io_executor + + +def _flush_io(): + """Wait for all pending async I/O to finish.""" + global _io_futures + for fut in _io_futures: + try: + fut.result() + except Exception as e: + print(f">>> [async I/O] error: {e}") + _io_futures.clear() + + +atexit.register(_flush_io) + + +def _save_results_sync(video_cpu: Tensor, filename: str, fps: int) -> None: + """Synchronous save on CPU tensor (runs in background thread).""" + video = torch.clamp(video_cpu.float(), -1., 1.) + n = video.shape[0] + video = video.permute(2, 0, 1, 3, 4) + frame_grids = [ + torchvision.utils.make_grid(framesheet, nrow=int(n), padding=0) + for framesheet in video + ] + grid = torch.stack(frame_grids, dim=0) + grid = (grid + 1.0) / 2.0 + grid = (grid * 255).to(torch.uint8).permute(0, 2, 3, 1) + torchvision.io.write_video(filename, + grid, + fps=fps, + video_codec='h264', + options={'crf': '10'}) + + +def save_results_async(video: Tensor, filename: str, fps: int = 8) -> None: + """Submit video saving to background thread pool.""" + video_cpu = video.detach().cpu() + fut = _get_io_executor().submit(_save_results_sync, video_cpu, filename, fps) + _io_futures.append(fut) + + +def _log_to_tb_sync(writer, video_cpu: Tensor, tag: str, fps: int) -> None: + """Synchronous TensorBoard log on CPU tensor (runs in background thread).""" + if video_cpu.dim() == 5: + n = video_cpu.shape[0] + video = video_cpu.permute(2, 0, 1, 3, 4) + frame_grids = [ + torchvision.utils.make_grid(framesheet, nrow=int(n), padding=0) + for framesheet in video + ] + grid = torch.stack(frame_grids, dim=0) + grid = (grid + 1.0) / 2.0 + grid = grid.unsqueeze(dim=0) + writer.add_video(tag, grid, fps=fps) + + +def log_to_tensorboard_async(writer, data: Tensor, tag: str, fps: int = 10) -> None: + """Submit TensorBoard logging to background thread pool.""" + if isinstance(data, torch.Tensor) and data.dim() == 5: + data_cpu = data.detach().cpu() + fut = _get_io_executor().submit(_log_to_tb_sync, writer, data_cpu, tag, fps) + _io_futures.append(fut) + + def get_init_frame_path(data_dir: str, sample: dict) -> str: """Construct the init_frame path from directory and sample metadata. @@ -673,31 +751,31 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None: cond_obs_queues = populate_queues(cond_obs_queues, observation) - # Save the imagen videos for decision-making + # Save the imagen videos for decision-making (async) if pred_videos_0 is not None: sample_tag = f"{args.dataset}-vid{sample['videoid']}-dm-fs-{fs}/itr-{itr}" - log_to_tensorboard(writer, - pred_videos_0, - sample_tag, - fps=args.save_fps) + log_to_tensorboard_async(writer, + pred_videos_0, + sample_tag, + fps=args.save_fps) # Save videos environment changes via world-model interaction sample_tag = f"{args.dataset}-vid{sample['videoid']}-wd-fs-{fs}/itr-{itr}" - log_to_tensorboard(writer, - pred_videos_1, - sample_tag, - fps=args.save_fps) + log_to_tensorboard_async(writer, + pred_videos_1, + sample_tag, + fps=args.save_fps) # Save the imagen videos for decision-making if pred_videos_0 is not None: sample_video_file = f'{video_save_dir}/dm/{fs}/itr-{itr}.mp4' - save_results(pred_videos_0.cpu(), - sample_video_file, - fps=args.save_fps) + save_results_async(pred_videos_0, + sample_video_file, + fps=args.save_fps) # Save videos environment changes via world-model interaction sample_video_file = f'{video_save_dir}/wm/{fs}/itr-{itr}.mp4' - save_results(pred_videos_1.cpu(), - sample_video_file, - fps=args.save_fps) + save_results_async(pred_videos_1, + sample_video_file, + fps=args.save_fps) print('>' * 24) # Collect the result of world-model interactions @@ -705,12 +783,15 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None: full_video = torch.cat(wm_video, dim=2) sample_tag = f"{args.dataset}-vid{sample['videoid']}-wd-fs-{fs}/full" - log_to_tensorboard(writer, - full_video, - sample_tag, - fps=args.save_fps) + log_to_tensorboard_async(writer, + full_video, + sample_tag, + fps=args.save_fps) sample_full_video_file = f"{video_save_dir}/../{sample['videoid']}_full_fs{fs}.mp4" - save_results(full_video, sample_full_video_file, fps=args.save_fps) + save_results_async(full_video, sample_full_video_file, fps=args.save_fps) + + # Wait for all async I/O to complete + _flush_io() def get_parser(): diff --git a/unitree_z1_dual_arm_stackbox_v2/case1/output.log b/unitree_z1_dual_arm_stackbox_v2/case1/output.log index 6daeda5..47c738b 100644 --- a/unitree_z1_dual_arm_stackbox_v2/case1/output.log +++ b/unitree_z1_dual_arm_stackbox_v2/case1/output.log @@ -1,24 +1,13 @@ -2026-02-10 17:39:22.590654: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. -2026-02-10 17:39:22.640645: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered -2026-02-10 17:39:22.640689: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered -2026-02-10 17:39:22.642010: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered -2026-02-10 17:39:22.649530: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. +2026-02-10 19:43:34.679819: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`. +2026-02-10 19:43:34.729245: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered +2026-02-10 19:43:34.729298: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered +2026-02-10 19:43:34.730600: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered +2026-02-10 19:43:34.738078: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations. To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags. -2026-02-10 17:39:23.575804: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT +2026-02-10 19:43:35.659490: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Global seed set to 123 -INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode -INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08 -INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08 -AE working on z of shape (1, 4, 32, 32) = 4096 dimensions. -INFO:root:Loaded ViT-H-14 model config. -DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): hf-mirror.com:443 -DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0 -INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k). -INFO:root:Loaded ViT-H-14 model config. -DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0 -INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k). ->>> model checkpoint loaded. ->>> Load pre-trained model ... +>>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ... +>>> Prepared model loaded. INFO:root:***** Configing Data ***** >>> unitree_z1_stackbox: 1 data samples loaded. >>> unitree_z1_stackbox: data stats loaded. @@ -41,8 +30,10 @@ DEBUG:h5py._conv:Creating converter from 3 to 5 DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13 DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9 DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096 - 0%| | 0/11 [00:00>> Step 0: generating actions ... + 0%| | 0/11 [00:00>> Step 0: generating actions ... >>> Step 0: interacting with world model ... +>>>>>>>>>>>>>>>>>>>>>>>> +>>> Step 1: generating actions ... DEBUG:PIL.Image:Importing BlpImagePlugin DEBUG:PIL.Image:Importing BmpImagePlugin DEBUG:PIL.Image:Importing BufrStubImagePlugin @@ -92,9 +83,7 @@ DEBUG:PIL.Image:Importing WmfImagePlugin DEBUG:PIL.Image:Importing XbmImagePlugin DEBUG:PIL.Image:Importing XpmImagePlugin DEBUG:PIL.Image:Importing XVThumbImagePlugin - 9%|▉ | 1/11 [00:35<05:55, 35.52s/it] 18%|█▊ | 2/11 [01:11<05:21, 35.73s/it] 27%|██▋ | 3/11 [01:47<04:48, 36.04s/it] 36%|███▋ | 4/11 [02:24<04:13, 36.19s/it] 45%|████▌ | 5/11 [03:00<03:37, 36.25s/it] 55%|█████▍ | 6/11 [03:36<03:00, 36.16s/it] 64%|██████▎ | 7/11 [04:12<02:24, 36.09s/it] 73%|███████▎ | 8/11 [04:48<01:48, 36.08s/it] 82%|████████▏ | 9/11 [05:24<01:12, 36.06s/it] 91%|█████████ | 10/11 [06:00<00:36, 36.07s/it] 100%|██████████| 11/11 [06:36<00:00, 36.07s/it] 100%|██████████| 11/11 [06:36<00:00, 36.07s/it] ->>>>>>>>>>>>>>>>>>>>>>>> ->>> Step 1: generating actions ... + 18%|█▊ | 2/11 [01:08<05:06, 34.03s/it] 27%|██▋ | 3/11 [01:42<04:34, 34.28s/it] 36%|███▋ | 4/11 [02:17<04:01, 34.45s/it] 45%|████▌ | 5/11 [02:51<03:26, 34.48s/it] 55%|█████▍ | 6/11 [03:26<02:52, 34.50s/it] 64%|██████▎ | 7/11 [04:00<02:18, 34.51s/it] 73%|███████▎ | 8/11 [04:35<01:43, 34.53s/it] 82%|████████▏ | 9/11 [05:10<01:09, 34.56s/it] 91%|█████████ | 10/11 [05:44<00:34, 34.53s/it] 100%|██████████| 11/11 [06:18<00:00, 34.50s/it] 100%|██████████| 11/11 [06:18<00:00, 34.45s/it] >>> Step 1: interacting with world model ... >>>>>>>>>>>>>>>>>>>>>>>> >>> Step 2: generating actions ... @@ -125,6 +114,6 @@ DEBUG:PIL.Image:Importing XVThumbImagePlugin >>> Step 10: interacting with world model ... >>>>>>>>>>>>>>>>>>>>>>>> -real 8m13.634s -user 7m37.875s -sys 2m31.672s +real 6m56.631s +user 5m36.951s +sys 2m10.073s