diff --git a/config/eval/cube.yaml b/config/eval/cube.yaml index 3ba34bf..a580c2f 100644 --- a/config/eval/cube.yaml +++ b/config/eval/cube.yaml @@ -36,6 +36,7 @@ eval: goal_offset_steps: 25 eval_budget: 50 img_size: 224 + save_video: false dataset_name: ogbench/cube_single_expert callables: # -- set state @@ -58,4 +59,3 @@ eval: output: filename: ogb_cube_results.txt - diff --git a/config/eval/pusht.yaml b/config/eval/pusht.yaml index 60c5c24..e92ddeb 100644 --- a/config/eval/pusht.yaml +++ b/config/eval/pusht.yaml @@ -32,6 +32,7 @@ eval: goal_offset_steps: 25 eval_budget: 50 img_size: 224 + save_video: false dataset_name: pusht_expert_train callables: # -- set state diff --git a/config/eval/reacher.yaml b/config/eval/reacher.yaml index d0c62dc..e5af58f 100644 --- a/config/eval/reacher.yaml +++ b/config/eval/reacher.yaml @@ -30,6 +30,7 @@ eval: goal_offset_steps: 25 eval_budget: 50 img_size: 224 + save_video: false dataset_name: dmc/reacher_random callables: # -- set state @@ -47,4 +48,3 @@ eval: output: filename: dmc_results.txt - diff --git a/config/eval/tworoom.yaml b/config/eval/tworoom.yaml index 23dd552..7754dcb 100644 --- a/config/eval/tworoom.yaml +++ b/config/eval/tworoom.yaml @@ -31,6 +31,7 @@ eval: goal_offset_steps: 25 eval_budget: 50 img_size: 224 + save_video: false dataset_name: tworoom callables: # -- set state diff --git a/eval.py b/eval.py index a99b394..732d83a 100644 --- a/eval.py +++ b/eval.py @@ -370,7 +370,7 @@ def run_eval_subset( callables=OmegaConf.to_container( local_cfg.eval.get("callables"), resolve=True ), - save_video=False, + save_video=bool(local_cfg.eval.get("save_video", False)), video_path=output_dir, ) if str(device).startswith("cuda") and torch.cuda.is_available(): diff --git a/pusht_results.txt b/pusht_results.txt index 435ee95..01183bd 100644 --- a/pusht_results.txt +++ b/pusht_results.txt @@ -437,3 +437,65 @@ evaluation_time: 29.783243894577026 seconds inference_precision: fp16 inference_compile_target: predictor inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/PushT-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio + - state +seed: 42 +policy: pusht/lewm +inference_precision: fp16 +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + save_video: true + dataset_name: pusht_expert_train + callables: + - method: _set_state + args: + state: + value: state + - method: _set_goal_state + args: + goal_state: + value: goal_state +output: + filename: pusht_results.txt + +==== RESULTS ==== +metrics: {'success_rate': 94.0, 'episode_successes': array([ True, True, True, True, True, False, True, True, True, + True, True, True, True, True, True, True, True, True, + True, False, True, True, True, True, True, True, True, + True, True, True, True, True, True, True, True, True, + True, True, True, True, True, True, False, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 34.548478841781616 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead