diff --git a/eval.py b/eval.py index 8e66fb3..a99b394 100644 --- a/eval.py +++ b/eval.py @@ -2,7 +2,9 @@ import os os.environ["MUJOCO_GL"] = "egl" +import multiprocessing as mp import time +import traceback from contextlib import nullcontext from pathlib import Path @@ -196,91 +198,46 @@ def dump_profiler_results(profiler, profile_dir, profile_cfg): return summary_path -@hydra.main(version_base=None, config_path="./config/eval", config_name="pusht") -def run(cfg: DictConfig): - """Run evaluation of dinowm vs random policy.""" - assert ( - cfg.plan_config.horizon * cfg.plan_config.action_block <= cfg.eval.eval_budget - ), "Planning horizon must be smaller than or equal to eval_budget" - # create world environment - cfg.world.max_episode_steps = 2 * cfg.eval.eval_budget - world = swm.World(**cfg.world, image_shape=(224, 224)) - - # create the transform - transform = { - "pixels": img_transform(cfg), - "goal": img_transform(cfg), +def get_multi_gpu_cfg(cfg): + multi_gpu_cfg = { + "enabled": False, + "devices": None, + "start_method": "spawn", } + cfg_multi_gpu = cfg.get("multi_gpu") + if cfg_multi_gpu is not None: + multi_gpu_cfg.update(OmegaConf.to_container(cfg_multi_gpu, resolve=True)) + return multi_gpu_cfg - dataset = get_dataset(cfg, cfg.eval.dataset_name) - stats_dataset = dataset # get_dataset(cfg, cfg.dataset.stats) - col_name = "episode_idx" if "episode_idx" in dataset.column_names else "ep_idx" - ep_indices, _ = np.unique(stats_dataset.get_col_data(col_name), return_index=True) +def build_process(cfg, dataset): process = {} for col in cfg.dataset.keys_to_cache: if col in ["pixels"]: continue processor = preprocessing.StandardScaler() - col_data = stats_dataset.get_col_data(col) + col_data = dataset.get_col_data(col) col_data = col_data[~np.isnan(col_data).any(axis=1)] processor.fit(col_data) process[col] = processor if col != "action": process[f"goal_{col}"] = process[col] + return process - # -- run evaluation - policy = cfg.get("policy", "random") - if policy != "random": - model = swm.policy.AutoCostModel(cfg.policy) - device = "cuda" if torch.cuda.is_available() else "cpu" - model = model.to(device) - model = model.eval() - model.requires_grad_(False) - model, compile_cfg, compile_target = maybe_compile_inference_target( - model, cfg, device - ) - print(f"model parameter dtype: {next(model.parameters()).dtype}") - inference_ctx, inference_precision = get_inference_context(cfg, device) - print(f"inference execution precision: {inference_precision}") - if compile_target != "disabled": - print( - f"inference compile target: {compile_target} " - f"(mode={compile_cfg['mode']})" - ) - model.interpolate_pos_encoding = True - config = swm.PlanConfig(**cfg.plan_config) - solver = hydra.utils.instantiate(cfg.solver, model=model) - policy = swm.policy.WorldModelPolicy( - solver=solver, config=config, process=process, transform=transform - ) - else: - policy = swm.policy.RandomPolicy() - inference_ctx = nullcontext() - inference_precision = "fp32" - compile_cfg = get_compile_cfg(cfg) - compile_target = "disabled" - - # Hydra switches the working directory to the per-run outputs folder. - # Keep all generated artifacts with that run instead of scattering them - # next to the cache or source tree. - output_dir = Path.cwd().resolve() - profiler_ctx, profile_dir, profile_cfg = make_profiler(cfg, output_dir) - - # sample the episodes and the starting indices +def sample_eval_cases(cfg, dataset): + stats_dataset = dataset + col_name = "episode_idx" if "episode_idx" in dataset.column_names else "ep_idx" + ep_indices, _ = np.unique(stats_dataset.get_col_data(col_name), return_index=True) episode_len = get_episodes_length(dataset, ep_indices) max_start_idx = episode_len - cfg.eval.goal_offset_steps - 1 max_start_idx_dict = {ep_id: max_start_idx[i] for i, ep_id in enumerate(ep_indices)} - # Map each dataset row’s episode_idx to its max_start_idx - col_name = "episode_idx" if "episode_idx" in dataset.column_names else "ep_idx" max_start_per_row = np.array( [max_start_idx_dict[ep_id] for ep_id in dataset.get_col_data(col_name)] ) - # remove all the lines of dataset for which dataset['step_idx'] > max_start_per_row valid_mask = dataset.get_col_data("step_idx") <= max_start_per_row valid_indices = np.nonzero(valid_mask)[0] print(valid_mask.sum(), "valid starting points found for evaluation.") @@ -289,21 +246,115 @@ def run(cfg: DictConfig): random_episode_indices = g.choice( len(valid_indices) - 1, size=cfg.eval.num_eval, replace=False ) - - # sort increasingly to avoid issues with HDF5Dataset indexing random_episode_indices = np.sort(valid_indices[random_episode_indices]) - print(random_episode_indices) - eval_episodes = dataset.get_row_data(random_episode_indices)[col_name] - eval_start_idx = dataset.get_row_data(random_episode_indices)["step_idx"] + rows = dataset.get_row_data(random_episode_indices) + eval_episodes = rows[col_name] + eval_start_idx = rows["step_idx"] if len(eval_episodes) < cfg.eval.num_eval: raise ValueError("Not enough episodes with sufficient length for evaluation.") + return eval_episodes, eval_start_idx + + +def normalize_multi_gpu_devices(devices): + if devices is None: + return [f"cuda:{idx}" for idx in range(torch.cuda.device_count())] + + normalized = [] + for device in devices: + if isinstance(device, int): + normalized.append(f"cuda:{device}") + elif isinstance(device, str) and device.isdigit(): + normalized.append(f"cuda:{int(device)}") + else: + normalized.append(str(device)) + return normalized + + +def shard_eval_cases(eval_episodes, eval_start_idx, num_shards): + if num_shards < 1: + raise ValueError("num_shards must be >= 1") + + total = len(eval_episodes) + shard_sizes = [total // num_shards] * num_shards + for idx in range(total % num_shards): + shard_sizes[idx] += 1 + + shards = [] + start = 0 + for size in shard_sizes: + end = start + size + if size > 0: + shards.append((eval_episodes[start:end], eval_start_idx[start:end])) + start = end + return shards + + +def run_eval_subset( + cfg: DictConfig, + eval_episodes, + eval_start_idx, + output_dir: Path, + *, + device_override: str | None = None, + enable_profile: bool = True, +): + local_cfg = OmegaConf.create(OmegaConf.to_container(cfg, resolve=False)) + local_cfg.eval.num_eval = len(eval_episodes) + local_cfg.world.num_envs = len(eval_episodes) + local_cfg.world.max_episode_steps = 2 * local_cfg.eval.eval_budget + + if device_override is not None: + local_cfg.solver.device = device_override + if torch.cuda.is_available() and str(device_override).startswith("cuda"): + torch.cuda.set_device(torch.device(device_override)) + + if not enable_profile: + if local_cfg.get("profile") is None: + local_cfg.profile = OmegaConf.create({"enabled": False}) + else: + local_cfg.profile.enabled = False + + world = swm.World(**local_cfg.world, image_shape=(224, 224)) + transform = { + "pixels": img_transform(local_cfg), + "goal": img_transform(local_cfg), + } + dataset = get_dataset(local_cfg, local_cfg.eval.dataset_name) + process = build_process(local_cfg, dataset) + + policy_name = local_cfg.get("policy", "random") + if policy_name != "random": + model = swm.policy.AutoCostModel(local_cfg.policy) + device = device_override or ("cuda" if torch.cuda.is_available() else "cpu") + model = model.to(device) + model = model.eval() + model.requires_grad_(False) + model, compile_cfg, compile_target = maybe_compile_inference_target( + model, local_cfg, device + ) + inference_ctx, inference_precision = get_inference_context(local_cfg, device) + model.interpolate_pos_encoding = True + config = swm.PlanConfig(**local_cfg.plan_config) + solver = hydra.utils.instantiate(local_cfg.solver, model=model) + policy = swm.policy.WorldModelPolicy( + solver=solver, config=config, process=process, transform=transform + ) + else: + policy = swm.policy.RandomPolicy() + inference_ctx = nullcontext() + inference_precision = "fp32" + compile_cfg = get_compile_cfg(local_cfg) + compile_target = "disabled" + device = device_override or ("cuda" if torch.cuda.is_available() else "cpu") + + profiler_ctx, profile_dir, profile_cfg = make_profiler(local_cfg, output_dir) world.set_policy(policy) - if torch.cuda.is_available(): + if str(device).startswith("cuda") and torch.cuda.is_available(): torch.cuda.synchronize() start_time = time.time() with torch.inference_mode(): @@ -312,19 +363,171 @@ def run(cfg: DictConfig): with torch.profiler.record_function("eval.world_evaluate_from_dataset"): metrics = world.evaluate_from_dataset( dataset, - start_steps=eval_start_idx.tolist(), - goal_offset_steps=cfg.eval.goal_offset_steps, - eval_budget=cfg.eval.eval_budget, - episodes_idx=eval_episodes.tolist(), - callables=OmegaConf.to_container(cfg.eval.get("callables"), resolve=True), + start_steps=list(eval_start_idx), + goal_offset_steps=local_cfg.eval.goal_offset_steps, + eval_budget=local_cfg.eval.eval_budget, + episodes_idx=list(eval_episodes), + callables=OmegaConf.to_container( + local_cfg.eval.get("callables"), resolve=True + ), save_video=False, video_path=output_dir, ) - if torch.cuda.is_available(): + if str(device).startswith("cuda") and torch.cuda.is_available(): torch.cuda.synchronize() - end_time = time.time() + evaluation_time = time.time() - start_time profile_summary_path = dump_profiler_results(profiler, profile_dir, profile_cfg) - + + return { + "metrics": metrics, + "evaluation_time": evaluation_time, + "inference_precision": inference_precision, + "compile_target": compile_target, + "compile_mode": compile_cfg["mode"] if compile_target != "disabled" else None, + "profile_dir": profile_dir, + "profile_summary_path": profile_summary_path, + } + + +def _multi_gpu_eval_worker( + cfg_container, + eval_episodes, + eval_start_idx, + output_dir, + device, + shard_idx, + queue, +): + try: + cfg = OmegaConf.create(cfg_container) + result = run_eval_subset( + cfg, + eval_episodes, + eval_start_idx, + Path(output_dir), + device_override=device, + enable_profile=False, + ) + queue.put({"ok": True, "shard_idx": shard_idx, "result": result}) + except Exception: + queue.put( + { + "ok": False, + "shard_idx": shard_idx, + "error": traceback.format_exc(), + } + ) + + +def run_multi_gpu_eval(cfg, eval_episodes, eval_start_idx, output_dir: Path): + multi_gpu_cfg = get_multi_gpu_cfg(cfg) + devices = normalize_multi_gpu_devices(multi_gpu_cfg["devices"]) + if len(devices) < 2: + raise ValueError("multi_gpu.enabled=true requires at least 2 CUDA devices") + + shards = shard_eval_cases(eval_episodes, eval_start_idx, min(len(devices), len(eval_episodes))) + devices = devices[: len(shards)] + + ctx = mp.get_context(multi_gpu_cfg["start_method"]) + queue = ctx.Queue() + cfg_container = OmegaConf.to_container(cfg, resolve=False) + processes = [] + + start_time = time.time() + for shard_idx, ((shard_episodes, shard_start_idx), device) in enumerate( + zip(shards, devices, strict=True) + ): + process = ctx.Process( + target=_multi_gpu_eval_worker, + args=( + cfg_container, + list(shard_episodes), + list(shard_start_idx), + str(output_dir), + device, + shard_idx, + queue, + ), + ) + process.start() + processes.append(process) + + shard_results = {} + errors = [] + for _ in processes: + message = queue.get() + if message["ok"]: + shard_results[message["shard_idx"]] = message["result"] + else: + errors.append(message["error"]) + + for process in processes: + process.join() + + if errors: + raise RuntimeError(errors[0]) + + ordered_results = [shard_results[idx] for idx in range(len(processes))] + episode_successes = np.concatenate( + [ + np.asarray(result["metrics"]["episode_successes"], dtype=np.bool_) + for result in ordered_results + ] + ) + + seeds = None + shard_seeds = [result["metrics"].get("seeds") for result in ordered_results] + if all(seed is not None for seed in shard_seeds): + seeds = np.concatenate(shard_seeds) + + metrics = { + "success_rate": float(np.sum(episode_successes)) / len(episode_successes) * 100.0, + "episode_successes": episode_successes, + "seeds": seeds, + } + reference = ordered_results[0] + return { + "metrics": metrics, + "evaluation_time": time.time() - start_time, + "inference_precision": reference["inference_precision"], + "compile_target": reference["compile_target"], + "compile_mode": reference["compile_mode"], + "profile_dir": None, + "profile_summary_path": None, + } + +@hydra.main(version_base=None, config_path="./config/eval", config_name="pusht") +def run(cfg: DictConfig): + """Run evaluation of dinowm vs random policy.""" + assert ( + cfg.plan_config.horizon * cfg.plan_config.action_block <= cfg.eval.eval_budget + ), "Planning horizon must be smaller than or equal to eval_budget" + + dataset = get_dataset(cfg, cfg.eval.dataset_name) + eval_episodes, eval_start_idx = sample_eval_cases(cfg, dataset) + output_dir = Path.cwd().resolve() + profile_cfg = get_profile_cfg(cfg) + + if get_multi_gpu_cfg(cfg)["enabled"]: + if profile_cfg["enabled"]: + raise ValueError("Profiling is not supported together with multi_gpu.enabled=true") + eval_result = run_multi_gpu_eval(cfg, eval_episodes, eval_start_idx, output_dir) + else: + eval_result = run_eval_subset( + cfg, + eval_episodes.tolist(), + eval_start_idx.tolist(), + output_dir, + ) + + metrics = eval_result["metrics"] + evaluation_time = eval_result["evaluation_time"] + inference_precision = eval_result["inference_precision"] + compile_target = eval_result["compile_target"] + compile_mode = eval_result["compile_mode"] + profile_dir = eval_result["profile_dir"] + profile_summary_path = eval_result["profile_summary_path"] + print(metrics) results_path = output_dir / cfg.output.filename @@ -339,11 +542,11 @@ def run(cfg: DictConfig): f.write("==== RESULTS ====\n") f.write(f"metrics: {metrics}\n") - f.write(f"evaluation_time: {end_time - start_time} seconds\n") + f.write(f"evaluation_time: {evaluation_time} seconds\n") f.write(f"inference_precision: {inference_precision}\n") f.write(f"inference_compile_target: {compile_target}\n") if compile_target != "disabled": - f.write(f"inference_compile_mode: {compile_cfg['mode']}\n") + f.write(f"inference_compile_mode: {compile_mode}\n") if profile_cfg["enabled"]: f.write(f"profile_dir: {profile_dir}\n") if profile_summary_path is not None: diff --git a/tworoom_results.txt b/tworoom_results.txt index bfaea00..b7efbd5 100644 --- a/tworoom_results.txt +++ b/tworoom_results.txt @@ -2788,3 +2788,582 @@ evaluation_time: 90.14458179473877 seconds inference_precision: fp16 inference_compile_target: predictor inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/TwoRoom-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +seed: 42 +policy: two-room/tworoom/lejepa +inference_precision: fp16 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + dataset_name: tworoom + callables: + - method: _set_state + args: + state: + value: proprio + - method: _set_goal_state + args: + goal_state: + value: goal_proprio +output: + filename: tworoom_results.txt +multi_gpu: + enabled: true + devices: + - 0 + - 1 + +==== RESULTS ==== +metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False, + True, True, True, True, True, True, True, True, False, + True, True, True, True, True, True, True, True, True, + True, True, True, True, False, True, True, True, True, + True, True, True, False, True, True, True, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 71.5921995639801 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/TwoRoom-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +seed: 42 +policy: two-room/tworoom/lejepa +inference_precision: fp16 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + dataset_name: tworoom + callables: + - method: _set_state + args: + state: + value: proprio + - method: _set_goal_state + args: + goal_state: + value: goal_proprio +output: + filename: tworoom_results.txt +multi_gpu: + enabled: true + devices: + - 0 + - 1 + +==== RESULTS ==== +metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False, + True, True, True, True, True, True, True, True, False, + True, True, True, True, True, True, True, True, True, + True, True, True, True, False, True, True, True, True, + True, True, True, False, True, True, True, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 38.652626514434814 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/TwoRoom-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +seed: 42 +policy: two-room/tworoom/lejepa +inference_precision: fp16 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + dataset_name: tworoom + callables: + - method: _set_state + args: + state: + value: proprio + - method: _set_goal_state + args: + goal_state: + value: goal_proprio +output: + filename: tworoom_results.txt +multi_gpu: + enabled: true + devices: + - 0 + - 1 + +==== RESULTS ==== +metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False, + True, True, True, True, True, True, True, True, False, + True, True, True, True, True, True, True, True, True, + True, True, True, True, False, True, True, True, True, + True, True, True, False, True, True, True, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 38.510936975479126 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/TwoRoom-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +seed: 42 +policy: two-room/tworoom/lejepa +inference_precision: fp16 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + dataset_name: tworoom + callables: + - method: _set_state + args: + state: + value: proprio + - method: _set_goal_state + args: + goal_state: + value: goal_proprio +output: + filename: tworoom_results.txt +multi_gpu: + enabled: true + devices: + - 0 + - 1 + - 2 + - 3 + +==== RESULTS ==== +metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False, + True, True, True, True, True, True, True, True, False, + True, True, True, True, True, True, True, True, True, + True, True, True, True, False, True, True, True, True, + True, True, False, True, True, True, True, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 80.60203051567078 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/TwoRoom-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +seed: 42 +policy: two-room/tworoom/lejepa +inference_precision: fp16 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + dataset_name: tworoom + callables: + - method: _set_state + args: + state: + value: proprio + - method: _set_goal_state + args: + goal_state: + value: goal_proprio +output: + filename: tworoom_results.txt + +==== RESULTS ==== +metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False, + True, True, True, True, True, True, True, True, True, + True, True, True, True, True, True, True, True, True, + True, True, True, True, False, True, True, True, True, + True, True, False, False, True, True, True, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 31.07235813140869 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/TwoRoom-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +seed: 42 +policy: two-room/tworoom/lejepa +inference_precision: fp16 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + dataset_name: tworoom + callables: + - method: _set_state + args: + state: + value: proprio + - method: _set_goal_state + args: + goal_state: + value: goal_proprio +output: + filename: tworoom_results.txt + +==== RESULTS ==== +metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False, + True, True, True, True, True, True, True, True, True, + True, True, True, True, True, True, True, True, True, + True, True, True, True, False, True, True, True, True, + True, True, False, False, True, True, True, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 30.950740575790405 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/TwoRoom-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +seed: 42 +policy: two-room/tworoom/lejepa +inference_precision: fp16 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + dataset_name: tworoom + callables: + - method: _set_state + args: + state: + value: proprio + - method: _set_goal_state + args: + goal_state: + value: goal_proprio +output: + filename: tworoom_results.txt +multi_gpu: + enabled: true + devices: + - 0 + - 1 + - 2 + +==== RESULTS ==== +metrics: {'success_rate': 92.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, True, + True, True, True, True, True, True, True, True, True, + True, True, True, True, True, True, True, True, True, + True, True, True, True, False, True, True, True, True, + True, True, True, False, True, True, True, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 191.0513756275177 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/TwoRoom-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +seed: 42 +policy: two-room/tworoom/lejepa +inference_precision: fp16 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + dataset_name: tworoom + callables: + - method: _set_state + args: + state: + value: proprio + - method: _set_goal_state + args: + goal_state: + value: goal_proprio +output: + filename: tworoom_results.txt +multi_gpu: + enabled: true + devices: + - 0 + - 1 + - 2 + +==== RESULTS ==== +metrics: {'success_rate': 92.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, True, + True, True, True, True, True, True, True, True, True, + True, True, True, True, True, True, True, True, True, + True, True, True, True, False, True, True, True, True, + True, True, True, False, True, True, True, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 133.4684717655182 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead + +==== CONFIG ==== +cache_dir: null +solver: + _target_: stable_worldmodel.solver.CEMSolver + model: ??? + batch_size: 8 + num_samples: 300 + var_scale: 1.0 + n_steps: 30 + topk: 30 + device: cuda + seed: ${seed} +world: + env_name: swm/TwoRoom-v1 + num_envs: ${eval.num_eval} + max_episode_steps: ??? + history_size: 1 + frame_skip: 1 +seed: 42 +policy: two-room/tworoom/lejepa +inference_precision: fp16 +dataset: + stats: ${eval.dataset_name} + keys_to_cache: + - action + - proprio +plan_config: + horizon: 5 + receding_horizon: 5 + action_block: 5 +eval: + num_eval: 50 + goal_offset_steps: 25 + eval_budget: 50 + img_size: 224 + dataset_name: tworoom + callables: + - method: _set_state + args: + state: + value: proprio + - method: _set_goal_state + args: + goal_state: + value: goal_proprio +output: + filename: tworoom_results.txt +multi_gpu: + enabled: true + devices: + - 0 + - 1 + +==== RESULTS ==== +metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False, + True, True, True, True, True, True, True, True, False, + True, True, True, True, True, True, True, True, True, + True, True, True, True, False, True, True, True, True, + True, True, True, False, True, True, True, True, True, + True, True, True, True, True]), 'seeds': None} +evaluation_time: 38.12551426887512 seconds +inference_precision: fp16 +inference_compile_target: predictor +inference_compile_mode: reduce-overhead