This commit is contained in:
qhy
2026-02-11 19:48:14 +08:00
parent cbaebc016f
commit 202062a647
104 changed files with 611 additions and 1 deletions

1
.gitignore vendored
View File

@@ -128,3 +128,4 @@ Data/Pretrained
Data/utils.py
Experiment/checkpoint
Experiment/log
ckpts/unifolm_wma_dual.ckpt

View File

@@ -222,7 +222,7 @@ data:
test:
target: unifolm_wma.data.wma_data.WMAData
params:
data_dir: '/path/to/unifolm-world-model-action/examples/world_model_interaction_prompts'
data_dir: '/home/qhy/unifolm-world-model-action/examples/world_model_interaction_prompts'
video_length: ${model.params.wma_config.params.temporal_length}
frame_stride: 2
load_raw_resolution: True

View File

@@ -0,0 +1,89 @@
import os
import glob
import numpy as np
import json
from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
from tqdm import tqdm
from moviepy.video.io.VideoFileClip import VideoFileClip
import PIL.Image
def calculate_psnr(img1, img2):
mse = np.mean((img1.astype(np.float64) - img2.astype(np.float64)) ** 2)
if mse == 0:
return float('inf')
max_pixel = 255.0
psnr = 20 * np.log10(max_pixel / np.sqrt(mse))
return psnr
def process_video_psnr(gt_path, pred_path):
try:
clip_gt = VideoFileClip(gt_path)
clip_pred = VideoFileClip(pred_path)
fps = min(clip_gt.fps, clip_pred.fps)
duration = min(clip_gt.duration, clip_pred.duration)
time_points = np.arange(0, duration, 1.0 / fps)
video_psnrs = []
for t in time_points:
frame_gt = clip_gt.get_frame(t)
frame_pred = clip_pred.get_frame(t)
img_gt = PIL.Image.fromarray(frame_gt).resize((256, 256), PIL.Image.Resampling.BILINEAR)
img_pred = PIL.Image.fromarray(frame_pred).resize((256, 256), PIL.Image.Resampling.BILINEAR)
psnr = calculate_psnr(np.array(img_gt), np.array(img_pred))
video_psnrs.append(psnr)
clip_gt.close()
clip_pred.close()
return np.mean(video_psnrs) if video_psnrs else 0.0
except Exception as e:
print(f"Error processing {os.path.basename(gt_path)}: {e}")
return None
def main():
parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
parser.add_argument('--gt_video', type=str, required=True, help='path to reference videos')
parser.add_argument('--pred_video', type=str, required=True, help='path to pred videos')
parser.add_argument('--output_file', type=str, default=None, help='path to output file')
args = parser.parse_args()
if not os.path.exists(args.gt_video):
print(f"Error: GT video not found at {args.gt_video}")
return
if not os.path.exists(args.pred_video):
print(f"Error: Pred video not found at {args.pred_video}")
return
print(f"Comparing:\nRef: {args.gt_video}\nPred: {args.pred_video}")
v_psnr = process_video_psnr(args.gt_video, args.pred_video)
if v_psnr is not None:
print("-" * 30)
print(f"Video PSNR: {v_psnr:.4f} dB")
print("-" * 30)
if args.output_file:
result = {
"gt_video": args.gt_video,
"pred_video": args.pred_video,
"psnr": v_psnr
}
with open(args.output_file, 'w') as f:
json.dump(result, f, indent=4)
print(f"Result saved to {args.output_file}")
else:
print("Failed to calculate PSNR.")
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,24 @@
res_dir="unitree_g1_pack_camera/case1"
dataset="unitree_g1_pack_camera"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_g1_pack_camera/case1/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 6 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 11 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 209 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
0,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 0 x x unitree_g1_pack_camera mount camera x x x G1_Dex1 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_g1_pack_camera/case2"
dataset="unitree_g1_pack_camera"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_g1_pack_camera/case2/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 6 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 11 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 214 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
50,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 50 x x unitree_g1_pack_camera mount camera x x x G1_Dex1 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_g1_pack_camera/case3"
dataset="unitree_g1_pack_camera"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_g1_pack_camera/case3/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 6 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 11 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 190 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
100,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 100 x x unitree_g1_pack_camera mount camera x x x G1_Dex1 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_g1_pack_camera/case4"
dataset="unitree_g1_pack_camera"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_g1_pack_camera/case4/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 6 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 11 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 221 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
200,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 200 x x unitree_g1_pack_camera mount camera x x x G1_Dex1 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_cleanup_pencils/case1"
dataset="unitree_z1_dual_arm_cleanup_pencils"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 8 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
0,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 0 x x unitree_z1_dual_arm_cleanup_pencils clean up eraser and pencils x x x Z1_Dual_Dex1 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_cleanup_pencils/case2"
dataset="unitree_z1_dual_arm_cleanup_pencils"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 8 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
50,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 50 x x unitree_z1_dual_arm_cleanup_pencils clean up eraser and pencils x x x Z1_Dual_Dex1 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_cleanup_pencils/case3"
dataset="unitree_z1_dual_arm_cleanup_pencils"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 8 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
100,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 100 x x unitree_z1_dual_arm_cleanup_pencils clean up eraser and pencils x x x Z1_Dual_Dex1 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_cleanup_pencils/case4"
dataset="unitree_z1_dual_arm_cleanup_pencils"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 8 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
200,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 200 x x unitree_z1_dual_arm_cleanup_pencils clean up eraser and pencils x x x Z1_Dual_Dex1 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_stackbox/case1"
dataset="unitree_z1_dual_arm_stackbox"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 7 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 272 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
5,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 5 x x unitree_z1_dual_arm_stackbox Pick up the red cup on the table. x x x Unitree Z1 Robot Dual-Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_stackbox/case2"
dataset="unitree_z1_dual_arm_stackbox"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 7 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 268 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
15,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 15 x x unitree_z1_dual_arm_stackbox Pick up the red cup on the table. x x x Unitree Z1 Robot Dual-Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_stackbox/case3"
dataset="unitree_z1_dual_arm_stackbox"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 7 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 267 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
25,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 25 x x unitree_z1_dual_arm_stackbox Pick up the red cup on the table. x x x Unitree Z1 Robot Dual-Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_stackbox/case4"
dataset="unitree_z1_dual_arm_stackbox"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 7 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 280 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
35,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 35 x x unitree_z1_dual_arm_stackbox Pick up the red cup on the table. x x x Unitree Z1 Robot Dual-Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_stackbox_v2/case1"
dataset="unitree_z1_dual_arm_stackbox_v2"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 11 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 186 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
5,x,x,unitree_z1_dual_arm_stackbox_v2,"Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top",x,x,x,Unitree Z1 Robot Dual-Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 5 x x unitree_z1_dual_arm_stackbox_v2 Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top x x x Unitree Z1 Robot Dual-Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_stackbox_v2/case2"
dataset="unitree_z1_dual_arm_stackbox_v2"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 11 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
15,x,x,unitree_z1_dual_arm_stackbox_v2,"Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top",x,x,x,Unitree Z1 Robot Dual-Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 15 x x unitree_z1_dual_arm_stackbox_v2 Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top x x x Unitree Z1 Robot Dual-Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_stackbox_v2/case3"
dataset="unitree_z1_dual_arm_stackbox_v2"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 11 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
25,x,x,unitree_z1_dual_arm_stackbox_v2,"Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top",x,x,x,Unitree Z1 Robot Dual-Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 25 x x unitree_z1_dual_arm_stackbox_v2 Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top x x x Unitree Z1 Robot Dual-Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_dual_arm_stackbox_v2/case4"
dataset="unitree_z1_dual_arm_stackbox_v2"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 11 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
35,x,x,unitree_z1_dual_arm_stackbox_v2,"Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top",x,x,x,Unitree Z1 Robot Dual-Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 35 x x unitree_z1_dual_arm_stackbox_v2 Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top x x x Unitree Z1 Robot Dual-Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_stackbox/case1"
dataset="unitree_z1_stackbox"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_stackbox/case1/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 12 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 153 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
5,x,x,unitree_z1_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 5 x x unitree_z1_stackbox Pick up the red cup on the table. x x x Unitree Z1 Robot Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_stackbox/case2"
dataset="unitree_z1_stackbox"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_stackbox/case2/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 12 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 164 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
15,x,x,unitree_z1_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 15 x x unitree_z1_stackbox Pick up the red cup on the table. x x x Unitree Z1 Robot Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_stackbox/case3"
dataset="unitree_z1_stackbox"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_stackbox/case3/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 12 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Binary file not shown.

After

Width:  |  Height:  |  Size: 146 KiB

View File

@@ -0,0 +1,2 @@
videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps
25,x,x,unitree_z1_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Arm,30
1 videoid contentUrl duration data_dir instruction dynamic_confidence dynamic_wording dynamic_source_category embodiment fps
2 25 x x unitree_z1_stackbox Pick up the red cup on the table. x x x Unitree Z1 Robot Arm 30

View File

@@ -0,0 +1,24 @@
res_dir="unitree_z1_stackbox/case4"
dataset="unitree_z1_stackbox"
{
time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \
--seed 123 \
--ckpt_path ckpts/unifolm_wma_dual.ckpt \
--config configs/inference/world_model_interaction.yaml \
--savedir "${res_dir}/output" \
--bs 1 --height 320 --width 512 \
--unconditional_guidance_scale 1.0 \
--ddim_steps 50 \
--ddim_eta 1.0 \
--prompt_dir "unitree_z1_stackbox/case4/world_model_interaction_prompts" \
--dataset ${dataset} \
--video_length 16 \
--frame_stride 4 \
--n_action_steps 16 \
--exe_steps 16 \
--n_iter 12 \
--timestep_spacing 'uniform_trailing' \
--guidance_rescale 0.7 \
--perframe_ae
} 2>&1 | tee "${res_dir}/output.log"

Some files were not shown because too many files have changed in this diff Show More