补充上次提交

2026-02-11 16:24:40 +08:00
parent 352a79035f
commit f386a5810b
4 changed files with 26 additions and 15 deletions
--- a/scripts/evaluation/world_model_interaction.py
+++ b/scripts/evaluation/world_model_interaction.py
@@ -572,9 +572,9 @@ def run_inference(args: argparse.Namespace, gpu_num: int, gpu_no: int) -> None:
        print(f">>> Prepared model saved ({os.path.getsize(prepared_path) / 1024**3:.1f} GB).")

    # ---- BF16: only convert the diffusion backbone, keep VAE/CLIP/embedder in FP32 ----
-    model.model.to(torch.bfloat16)
-    model.model.diffusion_model.dtype = torch.bfloat16
-    print(">>> Diffusion backbone (model.model) converted to BF16.")
+    model.model.to(torch.float16)
+    model.model.diffusion_model.dtype = torch.float16
+    print(">>> Diffusion backbone (model.model) converted to FP16.")

    # Build normalizer (always needed, independent of model loading path)
    logging.info("***** Configing Data *****")
--- a/src/unifolm_wma/models/ddpms.py
+++ b/src/unifolm_wma/models/ddpms.py
@@ -2457,7 +2457,17 @@ class DiffusionWrapper(pl.LightningModule):
        Returns:
            Output from the inner diffusion model (tensor or tuple, depending on the model).
        """
+        with torch.cuda.amp.autocast(dtype=torch.float16):
+            return self._forward_impl(x, x_action, x_state, t,
+                                      c_concat, c_crossattn, c_crossattn_action,
+                                      c_adm, s, mask, **kwargs)

+    def _forward_impl(
+        self,
+        x, x_action, x_state, t,
+        c_concat=None, c_crossattn=None, c_crossattn_action=None,
+        c_adm=None, s=None, mask=None, **kwargs,
+    ):
        if self.conditioning_key is None:
            out = self.diffusion_model(x, t)
        elif self.conditioning_key == 'concat':
--- a/unitree_z1_dual_arm_stackbox_v2/case1/output.log
+++ b/unitree_z1_dual_arm_stackbox_v2/case1/output.log
@@ -1,13 +1,14 @@
-2026-02-11 15:03:49.644187: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
-2026-02-11 15:03:49.694117: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
-2026-02-11 15:03:49.694162: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
-2026-02-11 15:03:49.695456: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
-2026-02-11 15:03:49.702946: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+2026-02-11 16:14:08.942290: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
+2026-02-11 16:14:08.992267: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
+2026-02-11 16:14:08.992319: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
+2026-02-11 16:14:08.993621: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
+2026-02-11 16:14:09.001096: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
 To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
-2026-02-11 15:03:50.638334: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
+2026-02-11 16:14:09.927986: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
 Global seed set to 123
 >>> Loading prepared model from ckpts/unifolm_wma_dual.ckpt.prepared.pt ...
 >>> Prepared model loaded.
+>>> Diffusion backbone (model.model) converted to FP16.
 INFO:root:***** Configing Data *****
 >>> unitree_z1_stackbox: 1 data samples loaded.
 >>> unitree_z1_stackbox: data stats loaded.
@@ -31,7 +32,7 @@ DEBUG:h5py._conv:Creating converter from 3 to 5
 DEBUG:PIL.PngImagePlugin:STREAM b'IHDR' 16 13
 DEBUG:PIL.PngImagePlugin:STREAM b'pHYs' 41 9
 DEBUG:PIL.PngImagePlugin:STREAM b'IDAT' 62 4096
-
+
  0%|          | 0/11 [00:00<?, ?it/s]
  9%|▉         | 1/11 [00:23<03:58, 23.83s/it]>>> Step 0: generating actions ...
 >>> Step 0: interacting with world model ...
@@ -84,7 +85,7 @@ DEBUG:PIL.Image:Importing WmfImagePlugin
 DEBUG:PIL.Image:Importing WebPImagePlugin
 DEBUG:PIL.Image:Importing WmfImagePlugin
 DEBUG:PIL.Image:Importing XbmImagePlugin
-DEBUG:PIL.Image:Importing XpmImagePlugin
+DEBUG:PIL.Image:Importing XpmImagePlugin
 DEBUG:PIL.Image:Importing XVThumbImagePlugin

 18%|█▊        | 2/11 [00:47<03:32, 23.66s/it]
@@ -115,6 +116,6 @@ DEBUG:PIL.Image:Importing XVThumbImagePlugin
 >>> Step 6: generating actions ...
 >>> Step 6: interacting with world model ...
 >>>>>>>>>>>>>>>>>>>>>>>>
->>> Step 7: generating actions ...
->>> Step 7: interacting with world model ...
->>>>>>>>>>>>>>>>>>>>>>>>
+>>> Step 7: generating actions ...
+>>> Step 7: interacting with world model ...
+>>>>>>>>>>>>>>>>>>>>>>>>
--- a/unitree_z1_dual_arm_stackbox_v2/case1/psnr_result.json
+++ b/unitree_z1_dual_arm_stackbox_v2/case1/psnr_result.json
@@ -1,5 +1,5 @@
 {
    "gt_video": "unitree_z1_dual_arm_stackbox_v2/case1/unitree_z1_dual_arm_stackbox_v2_case1.mp4",
    "pred_video": "unitree_z1_dual_arm_stackbox_v2/case1/output/inference/5_full_fs4.mp4",
-    "psnr": 28.167025381705358
+    "psnr": 25.21894470816415
 }