diff --git a/.gitignore b/.gitignore index 1735dae..ea715de 100644 --- a/.gitignore +++ b/.gitignore @@ -128,3 +128,4 @@ Data/Pretrained Data/utils.py Experiment/checkpoint Experiment/log +ckpts/unifolm_wma_dual.ckpt diff --git a/configs/inference/world_model_interaction.yaml b/configs/inference/world_model_interaction.yaml index 970d029..a1e115a 100644 --- a/configs/inference/world_model_interaction.yaml +++ b/configs/inference/world_model_interaction.yaml @@ -222,7 +222,7 @@ data: test: target: unifolm_wma.data.wma_data.WMAData params: - data_dir: '/path/to/unifolm-world-model-action/examples/world_model_interaction_prompts' + data_dir: '/home/qhy/unifolm-world-model-action/examples/world_model_interaction_prompts' video_length: ${model.params.wma_config.params.temporal_length} frame_stride: 2 load_raw_resolution: True diff --git a/psnr_score_for_challenge.py b/psnr_score_for_challenge.py new file mode 100644 index 0000000..6223db6 --- /dev/null +++ b/psnr_score_for_challenge.py @@ -0,0 +1,89 @@ +import os +import glob +import numpy as np +import json +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter +from tqdm import tqdm +from moviepy.video.io.VideoFileClip import VideoFileClip +import PIL.Image + + +def calculate_psnr(img1, img2): + mse = np.mean((img1.astype(np.float64) - img2.astype(np.float64)) ** 2) + if mse == 0: + return float('inf') + max_pixel = 255.0 + psnr = 20 * np.log10(max_pixel / np.sqrt(mse)) + return psnr + + +def process_video_psnr(gt_path, pred_path): + try: + clip_gt = VideoFileClip(gt_path) + clip_pred = VideoFileClip(pred_path) + + fps = min(clip_gt.fps, clip_pred.fps) + duration = min(clip_gt.duration, clip_pred.duration) + + time_points = np.arange(0, duration, 1.0 / fps) + + video_psnrs = [] + + for t in time_points: + frame_gt = clip_gt.get_frame(t) + frame_pred = clip_pred.get_frame(t) + + img_gt = PIL.Image.fromarray(frame_gt).resize((256, 256), PIL.Image.Resampling.BILINEAR) + img_pred = PIL.Image.fromarray(frame_pred).resize((256, 256), PIL.Image.Resampling.BILINEAR) + + psnr = calculate_psnr(np.array(img_gt), np.array(img_pred)) + video_psnrs.append(psnr) + + clip_gt.close() + clip_pred.close() + + return np.mean(video_psnrs) if video_psnrs else 0.0 + + except Exception as e: + print(f"Error processing {os.path.basename(gt_path)}: {e}") + return None + + +def main(): + parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) + parser.add_argument('--gt_video', type=str, required=True, help='path to reference videos') + parser.add_argument('--pred_video', type=str, required=True, help='path to pred videos') + parser.add_argument('--output_file', type=str, default=None, help='path to output file') + args = parser.parse_args() + + if not os.path.exists(args.gt_video): + print(f"Error: GT video not found at {args.gt_video}") + return + if not os.path.exists(args.pred_video): + print(f"Error: Pred video not found at {args.pred_video}") + return + + print(f"Comparing:\nRef: {args.gt_video}\nPred: {args.pred_video}") + + v_psnr = process_video_psnr(args.gt_video, args.pred_video) + + if v_psnr is not None: + print("-" * 30) + print(f"Video PSNR: {v_psnr:.4f} dB") + print("-" * 30) + + if args.output_file: + result = { + "gt_video": args.gt_video, + "pred_video": args.pred_video, + "psnr": v_psnr + } + with open(args.output_file, 'w') as f: + json.dump(result, f, indent=4) + print(f"Result saved to {args.output_file}") + else: + print("Failed to calculate PSNR.") + + +if __name__ == '__main__': + main() diff --git a/unitree_g1_pack_camera/case1/output/tensorboard/events.out.tfevents.1770810354.node-0.3088018.0 b/unitree_g1_pack_camera/case1/output/tensorboard/events.out.tfevents.1770810354.node-0.3088018.0 new file mode 100644 index 0000000..d01153e Binary files /dev/null and b/unitree_g1_pack_camera/case1/output/tensorboard/events.out.tfevents.1770810354.node-0.3088018.0 differ diff --git a/unitree_g1_pack_camera/case1/run_world_model_interaction.sh b/unitree_g1_pack_camera/case1/run_world_model_interaction.sh new file mode 100644 index 0000000..e0e900f --- /dev/null +++ b/unitree_g1_pack_camera/case1/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_g1_pack_camera/case1" +dataset="unitree_g1_pack_camera" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_g1_pack_camera/case1/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 6 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 11 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_g1_pack_camera/case1/world_model_interaction_prompts/images/unitree_g1_pack_camera/0.png b/unitree_g1_pack_camera/case1/world_model_interaction_prompts/images/unitree_g1_pack_camera/0.png new file mode 100644 index 0000000..8008d7a Binary files /dev/null and b/unitree_g1_pack_camera/case1/world_model_interaction_prompts/images/unitree_g1_pack_camera/0.png differ diff --git a/unitree_g1_pack_camera/case1/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/0.h5 b/unitree_g1_pack_camera/case1/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/0.h5 new file mode 100644 index 0000000..a5bf1f7 Binary files /dev/null and b/unitree_g1_pack_camera/case1/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/0.h5 differ diff --git a/unitree_g1_pack_camera/case1/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors b/unitree_g1_pack_camera/case1/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors new file mode 100644 index 0000000..4bdf81f Binary files /dev/null and b/unitree_g1_pack_camera/case1/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors differ diff --git a/unitree_g1_pack_camera/case1/world_model_interaction_prompts/unitree_g1_pack_camera.csv b/unitree_g1_pack_camera/case1/world_model_interaction_prompts/unitree_g1_pack_camera.csv new file mode 100644 index 0000000..2bdc1cd --- /dev/null +++ b/unitree_g1_pack_camera/case1/world_model_interaction_prompts/unitree_g1_pack_camera.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +0,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30 diff --git a/unitree_g1_pack_camera/case2/run_world_model_interaction.sh b/unitree_g1_pack_camera/case2/run_world_model_interaction.sh new file mode 100644 index 0000000..36e613d --- /dev/null +++ b/unitree_g1_pack_camera/case2/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_g1_pack_camera/case2" +dataset="unitree_g1_pack_camera" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_g1_pack_camera/case2/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 6 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 11 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_g1_pack_camera/case2/world_model_interaction_prompts/images/unitree_g1_pack_camera/50.png b/unitree_g1_pack_camera/case2/world_model_interaction_prompts/images/unitree_g1_pack_camera/50.png new file mode 100644 index 0000000..83eebaf Binary files /dev/null and b/unitree_g1_pack_camera/case2/world_model_interaction_prompts/images/unitree_g1_pack_camera/50.png differ diff --git a/unitree_g1_pack_camera/case2/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/50.h5 b/unitree_g1_pack_camera/case2/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/50.h5 new file mode 100644 index 0000000..90e741b Binary files /dev/null and b/unitree_g1_pack_camera/case2/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/50.h5 differ diff --git a/unitree_g1_pack_camera/case2/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors b/unitree_g1_pack_camera/case2/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors new file mode 100644 index 0000000..4bdf81f Binary files /dev/null and b/unitree_g1_pack_camera/case2/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors differ diff --git a/unitree_g1_pack_camera/case2/world_model_interaction_prompts/unitree_g1_pack_camera.csv b/unitree_g1_pack_camera/case2/world_model_interaction_prompts/unitree_g1_pack_camera.csv new file mode 100644 index 0000000..35ead3a --- /dev/null +++ b/unitree_g1_pack_camera/case2/world_model_interaction_prompts/unitree_g1_pack_camera.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +50,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30 diff --git a/unitree_g1_pack_camera/case3/run_world_model_interaction.sh b/unitree_g1_pack_camera/case3/run_world_model_interaction.sh new file mode 100644 index 0000000..87e3098 --- /dev/null +++ b/unitree_g1_pack_camera/case3/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_g1_pack_camera/case3" +dataset="unitree_g1_pack_camera" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_g1_pack_camera/case3/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 6 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 11 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_g1_pack_camera/case3/world_model_interaction_prompts/images/unitree_g1_pack_camera/100.png b/unitree_g1_pack_camera/case3/world_model_interaction_prompts/images/unitree_g1_pack_camera/100.png new file mode 100644 index 0000000..2f658f3 Binary files /dev/null and b/unitree_g1_pack_camera/case3/world_model_interaction_prompts/images/unitree_g1_pack_camera/100.png differ diff --git a/unitree_g1_pack_camera/case3/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/100.h5 b/unitree_g1_pack_camera/case3/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/100.h5 new file mode 100644 index 0000000..f976464 Binary files /dev/null and b/unitree_g1_pack_camera/case3/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/100.h5 differ diff --git a/unitree_g1_pack_camera/case3/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors b/unitree_g1_pack_camera/case3/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors new file mode 100644 index 0000000..4bdf81f Binary files /dev/null and b/unitree_g1_pack_camera/case3/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors differ diff --git a/unitree_g1_pack_camera/case3/world_model_interaction_prompts/unitree_g1_pack_camera.csv b/unitree_g1_pack_camera/case3/world_model_interaction_prompts/unitree_g1_pack_camera.csv new file mode 100644 index 0000000..c6350c9 --- /dev/null +++ b/unitree_g1_pack_camera/case3/world_model_interaction_prompts/unitree_g1_pack_camera.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +100,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30 diff --git a/unitree_g1_pack_camera/case4/run_world_model_interaction.sh b/unitree_g1_pack_camera/case4/run_world_model_interaction.sh new file mode 100644 index 0000000..46c5217 --- /dev/null +++ b/unitree_g1_pack_camera/case4/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_g1_pack_camera/case4" +dataset="unitree_g1_pack_camera" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_g1_pack_camera/case4/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 6 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 11 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_g1_pack_camera/case4/world_model_interaction_prompts/images/unitree_g1_pack_camera/200.png b/unitree_g1_pack_camera/case4/world_model_interaction_prompts/images/unitree_g1_pack_camera/200.png new file mode 100644 index 0000000..3c718aa Binary files /dev/null and b/unitree_g1_pack_camera/case4/world_model_interaction_prompts/images/unitree_g1_pack_camera/200.png differ diff --git a/unitree_g1_pack_camera/case4/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/200.h5 b/unitree_g1_pack_camera/case4/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/200.h5 new file mode 100644 index 0000000..606c218 Binary files /dev/null and b/unitree_g1_pack_camera/case4/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/200.h5 differ diff --git a/unitree_g1_pack_camera/case4/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors b/unitree_g1_pack_camera/case4/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors new file mode 100644 index 0000000..4bdf81f Binary files /dev/null and b/unitree_g1_pack_camera/case4/world_model_interaction_prompts/transitions/unitree_g1_pack_camera/meta_data/stats.safetensors differ diff --git a/unitree_g1_pack_camera/case4/world_model_interaction_prompts/unitree_g1_pack_camera.csv b/unitree_g1_pack_camera/case4/world_model_interaction_prompts/unitree_g1_pack_camera.csv new file mode 100644 index 0000000..1fae9f0 --- /dev/null +++ b/unitree_g1_pack_camera/case4/world_model_interaction_prompts/unitree_g1_pack_camera.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +200,x,x,unitree_g1_pack_camera,mount camera,x,x,x,G1_Dex1,30 diff --git a/unitree_z1_dual_arm_cleanup_pencils/case1/run_world_model_interaction.sh b/unitree_z1_dual_arm_cleanup_pencils/case1/run_world_model_interaction.sh new file mode 100644 index 0000000..8fe141f --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case1/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_cleanup_pencils/case1" +dataset="unitree_z1_dual_arm_cleanup_pencils" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 8 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/0.png b/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/0.png new file mode 100644 index 0000000..2d8739d Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/0.png differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/0.h5 b/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/0.h5 new file mode 100644 index 0000000..6b120eb Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/0.h5 differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors b/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors new file mode 100644 index 0000000..e3194ab Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv b/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv new file mode 100644 index 0000000..a749385 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case1/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +0,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30 diff --git a/unitree_z1_dual_arm_cleanup_pencils/case2/run_world_model_interaction.sh b/unitree_z1_dual_arm_cleanup_pencils/case2/run_world_model_interaction.sh new file mode 100644 index 0000000..2b84103 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case2/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_cleanup_pencils/case2" +dataset="unitree_z1_dual_arm_cleanup_pencils" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 8 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/50.png b/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/50.png new file mode 100644 index 0000000..91725eb Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/50.png differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/50.h5 b/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/50.h5 new file mode 100644 index 0000000..6c08657 Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/50.h5 differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors b/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors new file mode 100644 index 0000000..e3194ab Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv b/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv new file mode 100644 index 0000000..a754862 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case2/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +50,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30 diff --git a/unitree_z1_dual_arm_cleanup_pencils/case3/run_world_model_interaction.sh b/unitree_z1_dual_arm_cleanup_pencils/case3/run_world_model_interaction.sh new file mode 100644 index 0000000..78c56d7 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case3/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_cleanup_pencils/case3" +dataset="unitree_z1_dual_arm_cleanup_pencils" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 8 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/100.png b/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/100.png new file mode 100644 index 0000000..7cc656f Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/100.png differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/100.h5 b/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/100.h5 new file mode 100644 index 0000000..185d89b Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/100.h5 differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors b/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors new file mode 100644 index 0000000..e3194ab Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv b/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv new file mode 100644 index 0000000..3462452 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case3/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +100,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30 diff --git a/unitree_z1_dual_arm_cleanup_pencils/case4/run_world_model_interaction.sh b/unitree_z1_dual_arm_cleanup_pencils/case4/run_world_model_interaction.sh new file mode 100644 index 0000000..9367c09 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case4/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_cleanup_pencils/case4" +dataset="unitree_z1_dual_arm_cleanup_pencils" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 8 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/200.png b/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/200.png new file mode 100644 index 0000000..9934a16 Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/images/unitree_z1_dual_arm_cleanup_pencils/200.png differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/200.h5 b/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/200.h5 new file mode 100644 index 0000000..97ccecc Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/200.h5 differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors b/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors new file mode 100644 index 0000000..e3194ab Binary files /dev/null and b/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_cleanup_pencils/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv b/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv new file mode 100644 index 0000000..498d7f1 --- /dev/null +++ b/unitree_z1_dual_arm_cleanup_pencils/case4/world_model_interaction_prompts/unitree_z1_dual_arm_cleanup_pencils.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +200,x,x,unitree_z1_dual_arm_cleanup_pencils,clean up eraser and pencils,x,x,x,Z1_Dual_Dex1,30 diff --git a/unitree_z1_dual_arm_stackbox/case1/run_world_model_interaction.sh b/unitree_z1_dual_arm_stackbox/case1/run_world_model_interaction.sh new file mode 100644 index 0000000..0d9ed4c --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case1/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_stackbox/case1" +dataset="unitree_z1_dual_arm_stackbox" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 7 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/5.png b/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/5.png new file mode 100644 index 0000000..eb6e272 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/5.png differ diff --git a/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/5.h5 b/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/5.h5 new file mode 100644 index 0000000..af951c1 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/5.h5 differ diff --git a/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors b/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors new file mode 100644 index 0000000..fa7fd40 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv b/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv new file mode 100644 index 0000000..6e7f0a8 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case1/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +5,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30 diff --git a/unitree_z1_dual_arm_stackbox/case2/run_world_model_interaction.sh b/unitree_z1_dual_arm_stackbox/case2/run_world_model_interaction.sh new file mode 100644 index 0000000..7b6d005 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case2/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_stackbox/case2" +dataset="unitree_z1_dual_arm_stackbox" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 7 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/15.png b/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/15.png new file mode 100644 index 0000000..676341b Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/15.png differ diff --git a/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/15.h5 b/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/15.h5 new file mode 100644 index 0000000..bf66fa5 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/15.h5 differ diff --git a/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors b/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors new file mode 100644 index 0000000..fa7fd40 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv b/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv new file mode 100644 index 0000000..79f4f8c --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case2/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +15,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30 diff --git a/unitree_z1_dual_arm_stackbox/case3/run_world_model_interaction.sh b/unitree_z1_dual_arm_stackbox/case3/run_world_model_interaction.sh new file mode 100644 index 0000000..1058f25 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case3/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_stackbox/case3" +dataset="unitree_z1_dual_arm_stackbox" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 7 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/25.png b/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/25.png new file mode 100644 index 0000000..5540f09 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/25.png differ diff --git a/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/25.h5 b/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/25.h5 new file mode 100644 index 0000000..8a6ca42 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/25.h5 differ diff --git a/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors b/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors new file mode 100644 index 0000000..fa7fd40 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv b/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv new file mode 100644 index 0000000..3bbd2da --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case3/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +25,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30 diff --git a/unitree_z1_dual_arm_stackbox/case4/run_world_model_interaction.sh b/unitree_z1_dual_arm_stackbox/case4/run_world_model_interaction.sh new file mode 100644 index 0000000..fa46100 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case4/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_stackbox/case4" +dataset="unitree_z1_dual_arm_stackbox" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 7 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/35.png b/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/35.png new file mode 100644 index 0000000..f3ec0a3 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox/35.png differ diff --git a/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/35.h5 b/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/35.h5 new file mode 100644 index 0000000..875155b Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/35.h5 differ diff --git a/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors b/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors new file mode 100644 index 0000000..fa7fd40 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv b/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv new file mode 100644 index 0000000..f22144c --- /dev/null +++ b/unitree_z1_dual_arm_stackbox/case4/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +35,x,x,unitree_z1_dual_arm_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Dual-Arm,30 diff --git a/unitree_z1_dual_arm_stackbox_v2/case1/run_world_model_interaction.sh b/unitree_z1_dual_arm_stackbox_v2/case1/run_world_model_interaction.sh new file mode 100644 index 0000000..bdcbbff --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case1/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_stackbox_v2/case1" +dataset="unitree_z1_dual_arm_stackbox_v2" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 11 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/5.png b/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/5.png new file mode 100644 index 0000000..2371c4d Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/5.png differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/5.h5 b/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/5.h5 new file mode 100644 index 0000000..a999fc7 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/5.h5 differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors b/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors new file mode 100644 index 0000000..6ef7a6c Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv b/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv new file mode 100644 index 0000000..4591e75 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case1/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +5,x,x,unitree_z1_dual_arm_stackbox_v2,"Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top",x,x,x,Unitree Z1 Robot Dual-Arm,30 diff --git a/unitree_z1_dual_arm_stackbox_v2/case2/run_world_model_interaction.sh b/unitree_z1_dual_arm_stackbox_v2/case2/run_world_model_interaction.sh new file mode 100644 index 0000000..2c94946 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case2/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_stackbox_v2/case2" +dataset="unitree_z1_dual_arm_stackbox_v2" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 11 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/15.png b/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/15.png new file mode 100644 index 0000000..aab83f1 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/15.png differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/15.h5 b/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/15.h5 new file mode 100644 index 0000000..0a6bb8f Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/15.h5 differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors b/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors new file mode 100644 index 0000000..6ef7a6c Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv b/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv new file mode 100644 index 0000000..8cc81d4 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case2/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +15,x,x,unitree_z1_dual_arm_stackbox_v2,"Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top",x,x,x,Unitree Z1 Robot Dual-Arm,30 diff --git a/unitree_z1_dual_arm_stackbox_v2/case3/run_world_model_interaction.sh b/unitree_z1_dual_arm_stackbox_v2/case3/run_world_model_interaction.sh new file mode 100644 index 0000000..6708ee9 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case3/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_stackbox_v2/case3" +dataset="unitree_z1_dual_arm_stackbox_v2" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 11 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/25.png b/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/25.png new file mode 100644 index 0000000..f800036 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/25.png differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/25.h5 b/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/25.h5 new file mode 100644 index 0000000..966e7cc Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/25.h5 differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors b/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors new file mode 100644 index 0000000..6ef7a6c Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv b/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv new file mode 100644 index 0000000..4e1d4ee --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case3/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +25,x,x,unitree_z1_dual_arm_stackbox_v2,"Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top",x,x,x,Unitree Z1 Robot Dual-Arm,30 diff --git a/unitree_z1_dual_arm_stackbox_v2/case4/run_world_model_interaction.sh b/unitree_z1_dual_arm_stackbox_v2/case4/run_world_model_interaction.sh new file mode 100644 index 0000000..370c1c3 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case4/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_dual_arm_stackbox_v2/case4" +dataset="unitree_z1_dual_arm_stackbox_v2" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 11 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/35.png b/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/35.png new file mode 100644 index 0000000..d760f72 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/images/unitree_z1_dual_arm_stackbox_v2/35.png differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/35.h5 b/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/35.h5 new file mode 100644 index 0000000..d9adda8 Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/35.h5 differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors b/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors new file mode 100644 index 0000000..6ef7a6c Binary files /dev/null and b/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/transitions/unitree_z1_dual_arm_stackbox_v2/meta_data/stats.safetensors differ diff --git a/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv b/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv new file mode 100644 index 0000000..43c4b92 --- /dev/null +++ b/unitree_z1_dual_arm_stackbox_v2/case4/world_model_interaction_prompts/unitree_z1_dual_arm_stackbox_v2.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +35,x,x,unitree_z1_dual_arm_stackbox_v2,"Stack the blocks in the rectangular block: red at the bottom, yellow in the middle, green on top",x,x,x,Unitree Z1 Robot Dual-Arm,30 diff --git a/unitree_z1_stackbox/case1/run_world_model_interaction.sh b/unitree_z1_stackbox/case1/run_world_model_interaction.sh new file mode 100644 index 0000000..73d9132 --- /dev/null +++ b/unitree_z1_stackbox/case1/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_stackbox/case1" +dataset="unitree_z1_stackbox" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_stackbox/case1/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 12 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_stackbox/case1/world_model_interaction_prompts/images/unitree_z1_stackbox/5.png b/unitree_z1_stackbox/case1/world_model_interaction_prompts/images/unitree_z1_stackbox/5.png new file mode 100644 index 0000000..8e265c0 Binary files /dev/null and b/unitree_z1_stackbox/case1/world_model_interaction_prompts/images/unitree_z1_stackbox/5.png differ diff --git a/unitree_z1_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_stackbox/5.h5 b/unitree_z1_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_stackbox/5.h5 new file mode 100644 index 0000000..fa647f1 Binary files /dev/null and b/unitree_z1_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_stackbox/5.h5 differ diff --git a/unitree_z1_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors b/unitree_z1_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors new file mode 100644 index 0000000..1918ea0 Binary files /dev/null and b/unitree_z1_stackbox/case1/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors differ diff --git a/unitree_z1_stackbox/case1/world_model_interaction_prompts/unitree_z1_stackbox.csv b/unitree_z1_stackbox/case1/world_model_interaction_prompts/unitree_z1_stackbox.csv new file mode 100644 index 0000000..8f55185 --- /dev/null +++ b/unitree_z1_stackbox/case1/world_model_interaction_prompts/unitree_z1_stackbox.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +5,x,x,unitree_z1_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Arm,30 diff --git a/unitree_z1_stackbox/case2/run_world_model_interaction.sh b/unitree_z1_stackbox/case2/run_world_model_interaction.sh new file mode 100644 index 0000000..95fb33b --- /dev/null +++ b/unitree_z1_stackbox/case2/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_stackbox/case2" +dataset="unitree_z1_stackbox" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_stackbox/case2/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 12 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_stackbox/case2/world_model_interaction_prompts/images/unitree_z1_stackbox/15.png b/unitree_z1_stackbox/case2/world_model_interaction_prompts/images/unitree_z1_stackbox/15.png new file mode 100644 index 0000000..2b7be22 Binary files /dev/null and b/unitree_z1_stackbox/case2/world_model_interaction_prompts/images/unitree_z1_stackbox/15.png differ diff --git a/unitree_z1_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_stackbox/15.h5 b/unitree_z1_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_stackbox/15.h5 new file mode 100644 index 0000000..4a71e9f Binary files /dev/null and b/unitree_z1_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_stackbox/15.h5 differ diff --git a/unitree_z1_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors b/unitree_z1_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors new file mode 100644 index 0000000..1918ea0 Binary files /dev/null and b/unitree_z1_stackbox/case2/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors differ diff --git a/unitree_z1_stackbox/case2/world_model_interaction_prompts/unitree_z1_stackbox.csv b/unitree_z1_stackbox/case2/world_model_interaction_prompts/unitree_z1_stackbox.csv new file mode 100644 index 0000000..bde4468 --- /dev/null +++ b/unitree_z1_stackbox/case2/world_model_interaction_prompts/unitree_z1_stackbox.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +15,x,x,unitree_z1_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Arm,30 diff --git a/unitree_z1_stackbox/case3/run_world_model_interaction.sh b/unitree_z1_stackbox/case3/run_world_model_interaction.sh new file mode 100644 index 0000000..d92501c --- /dev/null +++ b/unitree_z1_stackbox/case3/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_stackbox/case3" +dataset="unitree_z1_stackbox" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_stackbox/case3/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 12 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_stackbox/case3/world_model_interaction_prompts/images/unitree_z1_stackbox/25.png b/unitree_z1_stackbox/case3/world_model_interaction_prompts/images/unitree_z1_stackbox/25.png new file mode 100644 index 0000000..1365fd5 Binary files /dev/null and b/unitree_z1_stackbox/case3/world_model_interaction_prompts/images/unitree_z1_stackbox/25.png differ diff --git a/unitree_z1_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_stackbox/25.h5 b/unitree_z1_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_stackbox/25.h5 new file mode 100644 index 0000000..27c0773 Binary files /dev/null and b/unitree_z1_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_stackbox/25.h5 differ diff --git a/unitree_z1_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors b/unitree_z1_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors new file mode 100644 index 0000000..1918ea0 Binary files /dev/null and b/unitree_z1_stackbox/case3/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors differ diff --git a/unitree_z1_stackbox/case3/world_model_interaction_prompts/unitree_z1_stackbox.csv b/unitree_z1_stackbox/case3/world_model_interaction_prompts/unitree_z1_stackbox.csv new file mode 100644 index 0000000..a32f631 --- /dev/null +++ b/unitree_z1_stackbox/case3/world_model_interaction_prompts/unitree_z1_stackbox.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +25,x,x,unitree_z1_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Arm,30 diff --git a/unitree_z1_stackbox/case4/run_world_model_interaction.sh b/unitree_z1_stackbox/case4/run_world_model_interaction.sh new file mode 100644 index 0000000..054b175 --- /dev/null +++ b/unitree_z1_stackbox/case4/run_world_model_interaction.sh @@ -0,0 +1,24 @@ +res_dir="unitree_z1_stackbox/case4" +dataset="unitree_z1_stackbox" + +{ + time CUDA_VISIBLE_DEVICES=0 python3 scripts/evaluation/world_model_interaction.py \ + --seed 123 \ + --ckpt_path ckpts/unifolm_wma_dual.ckpt \ + --config configs/inference/world_model_interaction.yaml \ + --savedir "${res_dir}/output" \ + --bs 1 --height 320 --width 512 \ + --unconditional_guidance_scale 1.0 \ + --ddim_steps 50 \ + --ddim_eta 1.0 \ + --prompt_dir "unitree_z1_stackbox/case4/world_model_interaction_prompts" \ + --dataset ${dataset} \ + --video_length 16 \ + --frame_stride 4 \ + --n_action_steps 16 \ + --exe_steps 16 \ + --n_iter 12 \ + --timestep_spacing 'uniform_trailing' \ + --guidance_rescale 0.7 \ + --perframe_ae +} 2>&1 | tee "${res_dir}/output.log" diff --git a/unitree_z1_stackbox/case4/world_model_interaction_prompts/images/unitree_z1_stackbox/35.png b/unitree_z1_stackbox/case4/world_model_interaction_prompts/images/unitree_z1_stackbox/35.png new file mode 100644 index 0000000..67736af Binary files /dev/null and b/unitree_z1_stackbox/case4/world_model_interaction_prompts/images/unitree_z1_stackbox/35.png differ diff --git a/unitree_z1_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_stackbox/35.h5 b/unitree_z1_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_stackbox/35.h5 new file mode 100644 index 0000000..94322f7 Binary files /dev/null and b/unitree_z1_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_stackbox/35.h5 differ diff --git a/unitree_z1_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors b/unitree_z1_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors new file mode 100644 index 0000000..1918ea0 Binary files /dev/null and b/unitree_z1_stackbox/case4/world_model_interaction_prompts/transitions/unitree_z1_stackbox/meta_data/stats.safetensors differ diff --git a/unitree_z1_stackbox/case4/world_model_interaction_prompts/unitree_z1_stackbox.csv b/unitree_z1_stackbox/case4/world_model_interaction_prompts/unitree_z1_stackbox.csv new file mode 100644 index 0000000..2f0bbc0 --- /dev/null +++ b/unitree_z1_stackbox/case4/world_model_interaction_prompts/unitree_z1_stackbox.csv @@ -0,0 +1,2 @@ +videoid,contentUrl,duration,data_dir,instruction,dynamic_confidence,dynamic_wording,dynamic_source_category,embodiment,fps +35,x,x,unitree_z1_stackbox,"Pick up the red cup on the table.",x,x,x,Unitree Z1 Robot Arm,30