DDIM loop 内小张量分配优化，attention mask 缓存到 GPU

2026-02-10 16:53:00 +08:00
parent ed637c972b
commit 91a9b0febc
5 changed files with 283 additions and 32 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -55,7 +55,6 @@ coverage.xml
 *.pot
 # Django stuff:
 *.log
 local_settings.py
 db.sqlite3
--- a/src/unifolm_wma/models/samplers/ddim.py
+++ b/src/unifolm_wma/models/samplers/ddim.py
@@ -67,11 +67,12 @@ class DDIMSampler(object):
            ddim_timesteps=self.ddim_timesteps,
            eta=ddim_eta,
            verbose=verbose)
-        self.register_buffer('ddim_sigmas', ddim_sigmas)
+        # Ensure tensors are on correct device for efficient indexing
-        self.register_buffer('ddim_alphas', ddim_alphas)
+        self.register_buffer('ddim_sigmas', to_torch(torch.as_tensor(ddim_sigmas)))
-        self.register_buffer('ddim_alphas_prev', ddim_alphas_prev)
+        self.register_buffer('ddim_alphas', to_torch(torch.as_tensor(ddim_alphas)))
        self.register_buffer('ddim_alphas_prev', to_torch(torch.as_tensor(ddim_alphas_prev)))
        self.register_buffer('ddim_sqrt_one_minus_alphas',
-                             np.sqrt(1. - ddim_alphas))
+                             to_torch(torch.as_tensor(np.sqrt(1. - ddim_alphas))))
        sigmas_for_original_sampling_steps = ddim_eta * torch.sqrt(
            (1 - self.alphas_cumprod_prev) / (1 - self.alphas_cumprod) *
            (1 - self.alphas_cumprod / self.alphas_cumprod_prev))
@@ -241,9 +242,10 @@ class DDIMSampler(object):
        dp_ddim_scheduler_action.set_timesteps(len(timesteps))
        dp_ddim_scheduler_state.set_timesteps(len(timesteps))
        ts = torch.empty((b, ), device=device, dtype=torch.long)
        for i, step in enumerate(iterator):
            index = total_steps - i - 1
-            ts = torch.full((b, ), step, device=device, dtype=torch.long)
+            ts.fill_(step)
            # Use mask to blend noised original latent (img_orig) & new sampled latent (img)
            if mask is not None:
@@ -325,10 +327,6 @@ class DDIMSampler(object):
                      guidance_rescale=0.0,
                      **kwargs):
        b, *_, device = *x.shape, x.device
        if x.dim() == 5:
            is_video = True
        else:
            is_video = False
        if unconditional_conditioning is None or unconditional_guidance_scale == 1.:
            model_output, model_output_action, model_output_state = self.model.apply_model(
@@ -377,17 +375,11 @@ class DDIMSampler(object):
        sqrt_one_minus_alphas = self.model.sqrt_one_minus_alphas_cumprod if use_original_steps else self.ddim_sqrt_one_minus_alphas
        sigmas = self.ddim_sigmas_for_original_num_steps if use_original_steps else self.ddim_sigmas
-        if is_video:
+        # Use 0-d tensors directly (already on device); broadcasting handles shape
-            size = (b, 1, 1, 1, 1)
+        a_t = alphas[index]
-        else:
+        a_prev = alphas_prev[index]
-            size = (b, 1, 1, 1)
+        sigma_t = sigmas[index]
-
+        sqrt_one_minus_at = sqrt_one_minus_alphas[index]
        a_t = torch.full(size, alphas[index], device=device)
        a_prev = torch.full(size, alphas_prev[index], device=device)
        sigma_t = torch.full(size, sigmas[index], device=device)
        sqrt_one_minus_at = torch.full(size,
                                       sqrt_one_minus_alphas[index],
                                       device=device)
        if self.model.parameterization != "v":
            pred_x0 = (x - sqrt_one_minus_at * e_t) / a_t.sqrt()
@@ -395,12 +387,8 @@ class DDIMSampler(object):
            pred_x0 = self.model.predict_start_from_z_and_v(x, t, model_output)
        if self.model.use_dynamic_rescale:
-            scale_t = torch.full(size,
+            scale_t = self.ddim_scale_arr[index]
-                                 self.ddim_scale_arr[index],
+            prev_scale_t = self.ddim_scale_arr_prev[index]
                                 device=device)
            prev_scale_t = torch.full(size,
                                      self.ddim_scale_arr_prev[index],
                                      device=device)
            rescale = (prev_scale_t / scale_t)
            pred_x0 *= rescale
--- a/src/unifolm_wma/modules/attention.py
+++ b/src/unifolm_wma/modules/attention.py
@@ -275,7 +275,8 @@ class CrossAttention(nn.Module):
                attn_mask_aa = self._get_attn_mask_aa(x.shape[0],
                                                      q.shape[1],
                                                      k_aa.shape[1],
-                                                      block_size=16).to(k_aa.device)
+                                                      block_size=16,
                                                      device=k_aa.device)
        else:
            if not spatial_self_attn:
                assert 1 > 2, ">>> ERROR: you should never go into here ..."
@@ -386,14 +387,26 @@ class CrossAttention(nn.Module):
        return self.to_out(out)
-    def _get_attn_mask_aa(self, b, l1, l2, block_size=16):
+    def _get_attn_mask_aa(self, b, l1, l2, block_size=16, device=None):
        cache_key = (b, l1, l2, block_size)
        if hasattr(self, '_attn_mask_aa_cache_key') and self._attn_mask_aa_cache_key == cache_key:
            cached = self._attn_mask_aa_cache
            if device is not None and cached.device != torch.device(device):
                cached = cached.to(device)
                self._attn_mask_aa_cache = cached
            return cached
        target_device = device if device is not None else 'cpu'
        num_token = l2 // block_size
-        start_positions = ((torch.arange(b) % block_size) + 1) * num_token
+        start_positions = ((torch.arange(b, device=target_device) % block_size) + 1) * num_token
-        col_indices = torch.arange(l2)
+        col_indices = torch.arange(l2, device=target_device)
        mask_2d = col_indices.unsqueeze(0) >= start_positions.unsqueeze(1)
        mask = mask_2d.unsqueeze(1).expand(b, l1, l2)
-        attn_mask = torch.zeros_like(mask, dtype=torch.float)
+        attn_mask = torch.zeros(b, l1, l2, dtype=torch.float, device=target_device)
        attn_mask[mask] = float('-inf')
        self._attn_mask_aa_cache_key = cache_key
        self._attn_mask_aa_cache = attn_mask
        return attn_mask
--- a/unitree_z1_dual_arm_cleanup_pencils/case1/output.log
+++ b/unitree_z1_dual_arm_cleanup_pencils/case1/output.log
@@ -0,0 +1,121 @@
 2026-02-10 15:38:28.973314: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
 2026-02-10 15:38:29.023024: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
 2026-02-10 15:38:29.023070: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
 2026-02-10 15:38:29.024393: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
 2026-02-10 15:38:29.031901: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
 To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
 2026-02-10 15:38:29.955454: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
 Global seed set to 123
 INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
 INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
 INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
 AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
 INFO:root:Loaded ViT-H-14 model config.
 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): hf-mirror.com:443
 DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
 INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
 INFO:root:Loaded ViT-H-14 model config.
 DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
 INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
 >>> model checkpoint loaded.
 >>> Load pre-trained model ...
 INFO:root:***** Configing Data *****
 >>> unitree_z1_stackbox: 1 data samples loaded.
 >>> unitree_z1_stackbox: data stats loaded.
 >>> unitree_z1_stackbox: normalizer initiated.
 >>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
 >>> unitree_z1_dual_arm_stackbox: data stats loaded.
 >>> unitree_z1_dual_arm_stackbox: normalizer initiated.
 >>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
 >>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
 >>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
 >>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
 >>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
 >>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
 >>> unitree_g1_pack_camera: 1 data samples loaded.
 >>> unitree_g1_pack_camera: data stats loaded.
 >>> unitree_g1_pack_camera: normalizer initiated.
 >>> Dataset is successfully loaded ...
 >>> Generate 16 frames under each generation ...
 DEBUG:h5py._conv:Creating converter from 3 to 5
 DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
 DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
 DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
  0%|          | 0/8 [00:00<?, ?it/s]>>> Step 0: generating actions ...
 >>> Step 0: interacting with world model ...
 DEBUG:PIL.Image:Importing BlpImagePlugin
 DEBUG:PIL.Image:Importing BmpImagePlugin
 DEBUG:PIL.Image:Importing BufrStubImagePlugin
 DEBUG:PIL.Image:Importing CurImagePlugin
 DEBUG:PIL.Image:Importing DcxImagePlugin
 DEBUG:PIL.Image:Importing DdsImagePlugin
 DEBUG:PIL.Image:Importing EpsImagePlugin
 DEBUG:PIL.Image:Importing FitsImagePlugin
 DEBUG:PIL.Image:Importing FitsStubImagePlugin
 DEBUG:PIL.Image:Importing FliImagePlugin
 DEBUG:PIL.Image:Importing FpxImagePlugin
 DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
 DEBUG:PIL.Image:Importing FtexImagePlugin
 DEBUG:PIL.Image:Importing GbrImagePlugin
 DEBUG:PIL.Image:Importing GifImagePlugin
 DEBUG:PIL.Image:Importing GribStubImagePlugin
 DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
 DEBUG:PIL.Image:Importing IcnsImagePlugin
 DEBUG:PIL.Image:Importing IcoImagePlugin
 DEBUG:PIL.Image:Importing ImImagePlugin
 DEBUG:PIL.Image:Importing ImtImagePlugin
 DEBUG:PIL.Image:Importing IptcImagePlugin
 DEBUG:PIL.Image:Importing JpegImagePlugin
 DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
 DEBUG:PIL.Image:Importing McIdasImagePlugin
 DEBUG:PIL.Image:Importing MicImagePlugin
 DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
 DEBUG:PIL.Image:Importing MpegImagePlugin
 DEBUG:PIL.Image:Importing MpoImagePlugin
 DEBUG:PIL.Image:Importing MspImagePlugin
 DEBUG:PIL.Image:Importing PalmImagePlugin
 DEBUG:PIL.Image:Importing PcdImagePlugin
 DEBUG:PIL.Image:Importing PcxImagePlugin
 DEBUG:PIL.Image:Importing PdfImagePlugin
 DEBUG:PIL.Image:Importing PixarImagePlugin
 DEBUG:PIL.Image:Importing PngImagePlugin
 DEBUG:PIL.Image:Importing PpmImagePlugin
 DEBUG:PIL.Image:Importing PsdImagePlugin
 DEBUG:PIL.Image:Importing QoiImagePlugin
 DEBUG:PIL.Image:Importing SgiImagePlugin
 DEBUG:PIL.Image:Importing SpiderImagePlugin
 DEBUG:PIL.Image:Importing SunImagePlugin
 DEBUG:PIL.Image:Importing TgaImagePlugin
 DEBUG:PIL.Image:Importing TiffImagePlugin
 DEBUG:PIL.Image:Importing WebPImagePlugin
 DEBUG:PIL.Image:Importing WmfImagePlugin
 DEBUG:PIL.Image:Importing XbmImagePlugin
 DEBUG:PIL.Image:Importing XpmImagePlugin
 DEBUG:PIL.Image:Importing XVThumbImagePlugin
 12%|█▎        | 1/8 [01:14<08:41, 74.51s/it]
 25%|██▌       | 2/8 [02:29<07:28, 74.79s/it]
 38%|███▊      | 3/8 [03:44<06:14, 74.81s/it]
 50%|█████     | 4/8 [04:59<04:59, 74.78s/it]
 62%|██████▎   | 5/8 [06:13<03:44, 74.73s/it]
 75%|███████▌  | 6/8 [07:28<02:29, 74.66s/it]
 88%|████████▊ | 7/8 [08:42<01:14, 74.56s/it]
 100%|██████████| 8/8 [09:56<00:00, 74.51s/it]
 100%|██████████| 8/8 [09:56<00:00, 74.62s/it]
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 1: generating actions ...
 >>> Step 1: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 2: generating actions ...
 >>> Step 2: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 3: generating actions ...
 >>> Step 3: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 4: generating actions ...
 >>> Step 4: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 5: generating actions ...
 >>> Step 5: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
--- a/unitree_z1_dual_arm_stackbox_v2/case1/output.log
+++ b/unitree_z1_dual_arm_stackbox_v2/case1/output.log
@@ -0,0 +1,130 @@
 2026-02-10 16:42:59.052755: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
 2026-02-10 16:42:59.102749: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
 2026-02-10 16:42:59.102803: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
 2026-02-10 16:42:59.104125: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
 2026-02-10 16:42:59.111711: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
 To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
 2026-02-10 16:43:00.040735: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
 Global seed set to 123
 INFO:mainlogger:LatentVisualDiffusion: Running in v-prediction mode
 INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
 INFO:unifolm_wma.models.diffusion_head.conditional_unet1d:number of parameters: 5.010531e+08
 AE working on z of shape (1, 4, 32, 32) = 4096 dimensions.
 INFO:root:Loaded ViT-H-14 model config.
 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): hf-mirror.com:443
 DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
 INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
 INFO:root:Loaded ViT-H-14 model config.
 DEBUG:urllib3.connectionpool:https://hf-mirror.com:443 "HEAD /laion/CLIP-ViT-H-14-laion2B-s32B-b79K/resolve/main/open_clip_pytorch_model.bin HTTP/1.1" 302 0
 INFO:root:Loading pretrained ViT-H-14 weights (laion2b_s32b_b79k).
 >>> model checkpoint loaded.
 >>> Load pre-trained model ...
 INFO:root:***** Configing Data *****
 >>> unitree_z1_stackbox: 1 data samples loaded.
 >>> unitree_z1_stackbox: data stats loaded.
 >>> unitree_z1_stackbox: normalizer initiated.
 >>> unitree_z1_dual_arm_stackbox: 1 data samples loaded.
 >>> unitree_z1_dual_arm_stackbox: data stats loaded.
 >>> unitree_z1_dual_arm_stackbox: normalizer initiated.
 >>> unitree_z1_dual_arm_stackbox_v2: 1 data samples loaded.
 >>> unitree_z1_dual_arm_stackbox_v2: data stats loaded.
 >>> unitree_z1_dual_arm_stackbox_v2: normalizer initiated.
 >>> unitree_z1_dual_arm_cleanup_pencils: 1 data samples loaded.
 >>> unitree_z1_dual_arm_cleanup_pencils: data stats loaded.
 >>> unitree_z1_dual_arm_cleanup_pencils: normalizer initiated.
 >>> unitree_g1_pack_camera: 1 data samples loaded.
 >>> unitree_g1_pack_camera: data stats loaded.
 >>> unitree_g1_pack_camera: normalizer initiated.
 >>> Dataset is successfully loaded ...
 >>> Generate 16 frames under each generation ...
 DEBUG:h5py._conv:Creating converter from 3 to 5
 DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
 DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
 DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
  0%|          | 0/11 [00:00<?, ?it/s]>>> Step 0: generating actions ...
 >>> Step 0: interacting with world model ...
 DEBUG:PIL.Image:Importing BlpImagePlugin
 DEBUG:PIL.Image:Importing BmpImagePlugin
 DEBUG:PIL.Image:Importing BufrStubImagePlugin
 DEBUG:PIL.Image:Importing CurImagePlugin
 DEBUG:PIL.Image:Importing DcxImagePlugin
 DEBUG:PIL.Image:Importing DdsImagePlugin
 DEBUG:PIL.Image:Importing EpsImagePlugin
 DEBUG:PIL.Image:Importing FitsImagePlugin
 DEBUG:PIL.Image:Importing FitsStubImagePlugin
 DEBUG:PIL.Image:Importing FliImagePlugin
 DEBUG:PIL.Image:Importing FpxImagePlugin
 DEBUG:PIL.Image:Image: failed to import FpxImagePlugin: No module named 'olefile'
 DEBUG:PIL.Image:Importing FtexImagePlugin
 DEBUG:PIL.Image:Importing GbrImagePlugin
 DEBUG:PIL.Image:Importing GifImagePlugin
 DEBUG:PIL.Image:Importing GribStubImagePlugin
 DEBUG:PIL.Image:Importing Hdf5StubImagePlugin
 DEBUG:PIL.Image:Importing IcnsImagePlugin
 DEBUG:PIL.Image:Importing IcoImagePlugin
 DEBUG:PIL.Image:Importing ImImagePlugin
 DEBUG:PIL.Image:Importing ImtImagePlugin
 DEBUG:PIL.Image:Importing IptcImagePlugin
 DEBUG:PIL.Image:Importing JpegImagePlugin
 DEBUG:PIL.Image:Importing Jpeg2KImagePlugin
 DEBUG:PIL.Image:Importing McIdasImagePlugin
 DEBUG:PIL.Image:Importing MicImagePlugin
 DEBUG:PIL.Image:Image: failed to import MicImagePlugin: No module named 'olefile'
 DEBUG:PIL.Image:Importing MpegImagePlugin
 DEBUG:PIL.Image:Importing MpoImagePlugin
 DEBUG:PIL.Image:Importing MspImagePlugin
 DEBUG:PIL.Image:Importing PalmImagePlugin
 DEBUG:PIL.Image:Importing PcdImagePlugin
 DEBUG:PIL.Image:Importing PcxImagePlugin
 DEBUG:PIL.Image:Importing PdfImagePlugin
 DEBUG:PIL.Image:Importing PixarImagePlugin
 DEBUG:PIL.Image:Importing PngImagePlugin
 DEBUG:PIL.Image:Importing PpmImagePlugin
 DEBUG:PIL.Image:Importing PsdImagePlugin
 DEBUG:PIL.Image:Importing QoiImagePlugin
 DEBUG:PIL.Image:Importing SgiImagePlugin
 DEBUG:PIL.Image:Importing SpiderImagePlugin
 DEBUG:PIL.Image:Importing SunImagePlugin
 DEBUG:PIL.Image:Importing TgaImagePlugin
 DEBUG:PIL.Image:Importing TiffImagePlugin
 DEBUG:PIL.Image:Importing WebPImagePlugin
 DEBUG:PIL.Image:Importing WmfImagePlugin
 DEBUG:PIL.Image:Importing XbmImagePlugin
 DEBUG:PIL.Image:Importing XpmImagePlugin
 DEBUG:PIL.Image:Importing XVThumbImagePlugin
  9%|▉         | 1/11 [00:40<06:41, 40.19s/it]
 18%|█▊        | 2/11 [01:20<06:04, 40.45s/it]
 27%|██▋       | 3/11 [02:01<05:25, 40.72s/it]
 36%|███▋      | 4/11 [02:42<04:45, 40.81s/it]
 45%|████▌     | 5/11 [03:23<04:04, 40.76s/it]
 55%|█████▍    | 6/11 [04:03<03:22, 40.57s/it]
 64%|██████▎   | 7/11 [04:43<02:41, 40.48s/it]
 73%|███████▎  | 8/11 [05:24<02:01, 40.44s/it]
 82%|████████▏ | 9/11 [06:04<01:20, 40.41s/it]
 91%|█████████ | 10/11 [06:45<00:40, 40.44s/it]
 100%|██████████| 11/11 [07:25<00:00, 40.45s/it]
 100%|██████████| 11/11 [07:25<00:00, 40.51s/it]
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 1: generating actions ...
 >>> Step 1: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 2: generating actions ...
 >>> Step 2: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 3: generating actions ...
 >>> Step 3: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 4: generating actions ...
 >>> Step 4: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 5: generating actions ...
 >>> Step 5: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 6: generating actions ...
 >>> Step 6: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
 >>> Step 7: generating actions ...
 >>> Step 7: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>