This commit is contained in:
qihuanye
2026-04-10 03:13:54 +00:00
parent e6f2b2b9d4
commit 8ba5bc8b0b
2 changed files with 859 additions and 77 deletions

357
eval.py
View File

@@ -2,7 +2,9 @@ import os
os.environ["MUJOCO_GL"] = "egl"
import multiprocessing as mp
import time
import traceback
from contextlib import nullcontext
from pathlib import Path
@@ -196,91 +198,46 @@ def dump_profiler_results(profiler, profile_dir, profile_cfg):
return summary_path
@hydra.main(version_base=None, config_path="./config/eval", config_name="pusht")
def run(cfg: DictConfig):
"""Run evaluation of dinowm vs random policy."""
assert (
cfg.plan_config.horizon * cfg.plan_config.action_block <= cfg.eval.eval_budget
), "Planning horizon must be smaller than or equal to eval_budget"
# create world environment
cfg.world.max_episode_steps = 2 * cfg.eval.eval_budget
world = swm.World(**cfg.world, image_shape=(224, 224))
# create the transform
transform = {
"pixels": img_transform(cfg),
"goal": img_transform(cfg),
def get_multi_gpu_cfg(cfg):
multi_gpu_cfg = {
"enabled": False,
"devices": None,
"start_method": "spawn",
}
cfg_multi_gpu = cfg.get("multi_gpu")
if cfg_multi_gpu is not None:
multi_gpu_cfg.update(OmegaConf.to_container(cfg_multi_gpu, resolve=True))
return multi_gpu_cfg
dataset = get_dataset(cfg, cfg.eval.dataset_name)
stats_dataset = dataset # get_dataset(cfg, cfg.dataset.stats)
col_name = "episode_idx" if "episode_idx" in dataset.column_names else "ep_idx"
ep_indices, _ = np.unique(stats_dataset.get_col_data(col_name), return_index=True)
def build_process(cfg, dataset):
process = {}
for col in cfg.dataset.keys_to_cache:
if col in ["pixels"]:
continue
processor = preprocessing.StandardScaler()
col_data = stats_dataset.get_col_data(col)
col_data = dataset.get_col_data(col)
col_data = col_data[~np.isnan(col_data).any(axis=1)]
processor.fit(col_data)
process[col] = processor
if col != "action":
process[f"goal_{col}"] = process[col]
return process
# -- run evaluation
policy = cfg.get("policy", "random")
if policy != "random":
model = swm.policy.AutoCostModel(cfg.policy)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device)
model = model.eval()
model.requires_grad_(False)
model, compile_cfg, compile_target = maybe_compile_inference_target(
model, cfg, device
)
print(f"model parameter dtype: {next(model.parameters()).dtype}")
inference_ctx, inference_precision = get_inference_context(cfg, device)
print(f"inference execution precision: {inference_precision}")
if compile_target != "disabled":
print(
f"inference compile target: {compile_target} "
f"(mode={compile_cfg['mode']})"
)
model.interpolate_pos_encoding = True
config = swm.PlanConfig(**cfg.plan_config)
solver = hydra.utils.instantiate(cfg.solver, model=model)
policy = swm.policy.WorldModelPolicy(
solver=solver, config=config, process=process, transform=transform
)
else:
policy = swm.policy.RandomPolicy()
inference_ctx = nullcontext()
inference_precision = "fp32"
compile_cfg = get_compile_cfg(cfg)
compile_target = "disabled"
# Hydra switches the working directory to the per-run outputs folder.
# Keep all generated artifacts with that run instead of scattering them
# next to the cache or source tree.
output_dir = Path.cwd().resolve()
profiler_ctx, profile_dir, profile_cfg = make_profiler(cfg, output_dir)
# sample the episodes and the starting indices
def sample_eval_cases(cfg, dataset):
stats_dataset = dataset
col_name = "episode_idx" if "episode_idx" in dataset.column_names else "ep_idx"
ep_indices, _ = np.unique(stats_dataset.get_col_data(col_name), return_index=True)
episode_len = get_episodes_length(dataset, ep_indices)
max_start_idx = episode_len - cfg.eval.goal_offset_steps - 1
max_start_idx_dict = {ep_id: max_start_idx[i] for i, ep_id in enumerate(ep_indices)}
# Map each dataset rows episode_idx to its max_start_idx
col_name = "episode_idx" if "episode_idx" in dataset.column_names else "ep_idx"
max_start_per_row = np.array(
[max_start_idx_dict[ep_id] for ep_id in dataset.get_col_data(col_name)]
)
# remove all the lines of dataset for which dataset['step_idx'] > max_start_per_row
valid_mask = dataset.get_col_data("step_idx") <= max_start_per_row
valid_indices = np.nonzero(valid_mask)[0]
print(valid_mask.sum(), "valid starting points found for evaluation.")
@@ -289,21 +246,115 @@ def run(cfg: DictConfig):
random_episode_indices = g.choice(
len(valid_indices) - 1, size=cfg.eval.num_eval, replace=False
)
# sort increasingly to avoid issues with HDF5Dataset indexing
random_episode_indices = np.sort(valid_indices[random_episode_indices])
print(random_episode_indices)
eval_episodes = dataset.get_row_data(random_episode_indices)[col_name]
eval_start_idx = dataset.get_row_data(random_episode_indices)["step_idx"]
rows = dataset.get_row_data(random_episode_indices)
eval_episodes = rows[col_name]
eval_start_idx = rows["step_idx"]
if len(eval_episodes) < cfg.eval.num_eval:
raise ValueError("Not enough episodes with sufficient length for evaluation.")
return eval_episodes, eval_start_idx
def normalize_multi_gpu_devices(devices):
if devices is None:
return [f"cuda:{idx}" for idx in range(torch.cuda.device_count())]
normalized = []
for device in devices:
if isinstance(device, int):
normalized.append(f"cuda:{device}")
elif isinstance(device, str) and device.isdigit():
normalized.append(f"cuda:{int(device)}")
else:
normalized.append(str(device))
return normalized
def shard_eval_cases(eval_episodes, eval_start_idx, num_shards):
if num_shards < 1:
raise ValueError("num_shards must be >= 1")
total = len(eval_episodes)
shard_sizes = [total // num_shards] * num_shards
for idx in range(total % num_shards):
shard_sizes[idx] += 1
shards = []
start = 0
for size in shard_sizes:
end = start + size
if size > 0:
shards.append((eval_episodes[start:end], eval_start_idx[start:end]))
start = end
return shards
def run_eval_subset(
cfg: DictConfig,
eval_episodes,
eval_start_idx,
output_dir: Path,
*,
device_override: str | None = None,
enable_profile: bool = True,
):
local_cfg = OmegaConf.create(OmegaConf.to_container(cfg, resolve=False))
local_cfg.eval.num_eval = len(eval_episodes)
local_cfg.world.num_envs = len(eval_episodes)
local_cfg.world.max_episode_steps = 2 * local_cfg.eval.eval_budget
if device_override is not None:
local_cfg.solver.device = device_override
if torch.cuda.is_available() and str(device_override).startswith("cuda"):
torch.cuda.set_device(torch.device(device_override))
if not enable_profile:
if local_cfg.get("profile") is None:
local_cfg.profile = OmegaConf.create({"enabled": False})
else:
local_cfg.profile.enabled = False
world = swm.World(**local_cfg.world, image_shape=(224, 224))
transform = {
"pixels": img_transform(local_cfg),
"goal": img_transform(local_cfg),
}
dataset = get_dataset(local_cfg, local_cfg.eval.dataset_name)
process = build_process(local_cfg, dataset)
policy_name = local_cfg.get("policy", "random")
if policy_name != "random":
model = swm.policy.AutoCostModel(local_cfg.policy)
device = device_override or ("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model = model.eval()
model.requires_grad_(False)
model, compile_cfg, compile_target = maybe_compile_inference_target(
model, local_cfg, device
)
inference_ctx, inference_precision = get_inference_context(local_cfg, device)
model.interpolate_pos_encoding = True
config = swm.PlanConfig(**local_cfg.plan_config)
solver = hydra.utils.instantiate(local_cfg.solver, model=model)
policy = swm.policy.WorldModelPolicy(
solver=solver, config=config, process=process, transform=transform
)
else:
policy = swm.policy.RandomPolicy()
inference_ctx = nullcontext()
inference_precision = "fp32"
compile_cfg = get_compile_cfg(local_cfg)
compile_target = "disabled"
device = device_override or ("cuda" if torch.cuda.is_available() else "cpu")
profiler_ctx, profile_dir, profile_cfg = make_profiler(local_cfg, output_dir)
world.set_policy(policy)
if torch.cuda.is_available():
if str(device).startswith("cuda") and torch.cuda.is_available():
torch.cuda.synchronize()
start_time = time.time()
with torch.inference_mode():
@@ -312,19 +363,171 @@ def run(cfg: DictConfig):
with torch.profiler.record_function("eval.world_evaluate_from_dataset"):
metrics = world.evaluate_from_dataset(
dataset,
start_steps=eval_start_idx.tolist(),
goal_offset_steps=cfg.eval.goal_offset_steps,
eval_budget=cfg.eval.eval_budget,
episodes_idx=eval_episodes.tolist(),
callables=OmegaConf.to_container(cfg.eval.get("callables"), resolve=True),
start_steps=list(eval_start_idx),
goal_offset_steps=local_cfg.eval.goal_offset_steps,
eval_budget=local_cfg.eval.eval_budget,
episodes_idx=list(eval_episodes),
callables=OmegaConf.to_container(
local_cfg.eval.get("callables"), resolve=True
),
save_video=False,
video_path=output_dir,
)
if torch.cuda.is_available():
if str(device).startswith("cuda") and torch.cuda.is_available():
torch.cuda.synchronize()
end_time = time.time()
evaluation_time = time.time() - start_time
profile_summary_path = dump_profiler_results(profiler, profile_dir, profile_cfg)
return {
"metrics": metrics,
"evaluation_time": evaluation_time,
"inference_precision": inference_precision,
"compile_target": compile_target,
"compile_mode": compile_cfg["mode"] if compile_target != "disabled" else None,
"profile_dir": profile_dir,
"profile_summary_path": profile_summary_path,
}
def _multi_gpu_eval_worker(
cfg_container,
eval_episodes,
eval_start_idx,
output_dir,
device,
shard_idx,
queue,
):
try:
cfg = OmegaConf.create(cfg_container)
result = run_eval_subset(
cfg,
eval_episodes,
eval_start_idx,
Path(output_dir),
device_override=device,
enable_profile=False,
)
queue.put({"ok": True, "shard_idx": shard_idx, "result": result})
except Exception:
queue.put(
{
"ok": False,
"shard_idx": shard_idx,
"error": traceback.format_exc(),
}
)
def run_multi_gpu_eval(cfg, eval_episodes, eval_start_idx, output_dir: Path):
multi_gpu_cfg = get_multi_gpu_cfg(cfg)
devices = normalize_multi_gpu_devices(multi_gpu_cfg["devices"])
if len(devices) < 2:
raise ValueError("multi_gpu.enabled=true requires at least 2 CUDA devices")
shards = shard_eval_cases(eval_episodes, eval_start_idx, min(len(devices), len(eval_episodes)))
devices = devices[: len(shards)]
ctx = mp.get_context(multi_gpu_cfg["start_method"])
queue = ctx.Queue()
cfg_container = OmegaConf.to_container(cfg, resolve=False)
processes = []
start_time = time.time()
for shard_idx, ((shard_episodes, shard_start_idx), device) in enumerate(
zip(shards, devices, strict=True)
):
process = ctx.Process(
target=_multi_gpu_eval_worker,
args=(
cfg_container,
list(shard_episodes),
list(shard_start_idx),
str(output_dir),
device,
shard_idx,
queue,
),
)
process.start()
processes.append(process)
shard_results = {}
errors = []
for _ in processes:
message = queue.get()
if message["ok"]:
shard_results[message["shard_idx"]] = message["result"]
else:
errors.append(message["error"])
for process in processes:
process.join()
if errors:
raise RuntimeError(errors[0])
ordered_results = [shard_results[idx] for idx in range(len(processes))]
episode_successes = np.concatenate(
[
np.asarray(result["metrics"]["episode_successes"], dtype=np.bool_)
for result in ordered_results
]
)
seeds = None
shard_seeds = [result["metrics"].get("seeds") for result in ordered_results]
if all(seed is not None for seed in shard_seeds):
seeds = np.concatenate(shard_seeds)
metrics = {
"success_rate": float(np.sum(episode_successes)) / len(episode_successes) * 100.0,
"episode_successes": episode_successes,
"seeds": seeds,
}
reference = ordered_results[0]
return {
"metrics": metrics,
"evaluation_time": time.time() - start_time,
"inference_precision": reference["inference_precision"],
"compile_target": reference["compile_target"],
"compile_mode": reference["compile_mode"],
"profile_dir": None,
"profile_summary_path": None,
}
@hydra.main(version_base=None, config_path="./config/eval", config_name="pusht")
def run(cfg: DictConfig):
"""Run evaluation of dinowm vs random policy."""
assert (
cfg.plan_config.horizon * cfg.plan_config.action_block <= cfg.eval.eval_budget
), "Planning horizon must be smaller than or equal to eval_budget"
dataset = get_dataset(cfg, cfg.eval.dataset_name)
eval_episodes, eval_start_idx = sample_eval_cases(cfg, dataset)
output_dir = Path.cwd().resolve()
profile_cfg = get_profile_cfg(cfg)
if get_multi_gpu_cfg(cfg)["enabled"]:
if profile_cfg["enabled"]:
raise ValueError("Profiling is not supported together with multi_gpu.enabled=true")
eval_result = run_multi_gpu_eval(cfg, eval_episodes, eval_start_idx, output_dir)
else:
eval_result = run_eval_subset(
cfg,
eval_episodes.tolist(),
eval_start_idx.tolist(),
output_dir,
)
metrics = eval_result["metrics"]
evaluation_time = eval_result["evaluation_time"]
inference_precision = eval_result["inference_precision"]
compile_target = eval_result["compile_target"]
compile_mode = eval_result["compile_mode"]
profile_dir = eval_result["profile_dir"]
profile_summary_path = eval_result["profile_summary_path"]
print(metrics)
results_path = output_dir / cfg.output.filename
@@ -339,11 +542,11 @@ def run(cfg: DictConfig):
f.write("==== RESULTS ====\n")
f.write(f"metrics: {metrics}\n")
f.write(f"evaluation_time: {end_time - start_time} seconds\n")
f.write(f"evaluation_time: {evaluation_time} seconds\n")
f.write(f"inference_precision: {inference_precision}\n")
f.write(f"inference_compile_target: {compile_target}\n")
if compile_target != "disabled":
f.write(f"inference_compile_mode: {compile_cfg['mode']}\n")
f.write(f"inference_compile_mode: {compile_mode}\n")
if profile_cfg["enabled"]:
f.write(f"profile_dir: {profile_dir}\n")
if profile_summary_path is not None:

View File

@@ -2788,3 +2788,582 @@ evaluation_time: 90.14458179473877 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead
==== CONFIG ====
cache_dir: null
solver:
_target_: stable_worldmodel.solver.CEMSolver
model: ???
batch_size: 8
num_samples: 300
var_scale: 1.0
n_steps: 30
topk: 30
device: cuda
seed: ${seed}
world:
env_name: swm/TwoRoom-v1
num_envs: ${eval.num_eval}
max_episode_steps: ???
history_size: 1
frame_skip: 1
seed: 42
policy: two-room/tworoom/lejepa
inference_precision: fp16
dataset:
stats: ${eval.dataset_name}
keys_to_cache:
- action
- proprio
plan_config:
horizon: 5
receding_horizon: 5
action_block: 5
eval:
num_eval: 50
goal_offset_steps: 25
eval_budget: 50
img_size: 224
dataset_name: tworoom
callables:
- method: _set_state
args:
state:
value: proprio
- method: _set_goal_state
args:
goal_state:
value: goal_proprio
output:
filename: tworoom_results.txt
multi_gpu:
enabled: true
devices:
- 0
- 1
==== RESULTS ====
metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False,
True, True, True, True, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, True, True]), 'seeds': None}
evaluation_time: 71.5921995639801 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead
==== CONFIG ====
cache_dir: null
solver:
_target_: stable_worldmodel.solver.CEMSolver
model: ???
batch_size: 8
num_samples: 300
var_scale: 1.0
n_steps: 30
topk: 30
device: cuda
seed: ${seed}
world:
env_name: swm/TwoRoom-v1
num_envs: ${eval.num_eval}
max_episode_steps: ???
history_size: 1
frame_skip: 1
seed: 42
policy: two-room/tworoom/lejepa
inference_precision: fp16
dataset:
stats: ${eval.dataset_name}
keys_to_cache:
- action
- proprio
plan_config:
horizon: 5
receding_horizon: 5
action_block: 5
eval:
num_eval: 50
goal_offset_steps: 25
eval_budget: 50
img_size: 224
dataset_name: tworoom
callables:
- method: _set_state
args:
state:
value: proprio
- method: _set_goal_state
args:
goal_state:
value: goal_proprio
output:
filename: tworoom_results.txt
multi_gpu:
enabled: true
devices:
- 0
- 1
==== RESULTS ====
metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False,
True, True, True, True, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, True, True]), 'seeds': None}
evaluation_time: 38.652626514434814 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead
==== CONFIG ====
cache_dir: null
solver:
_target_: stable_worldmodel.solver.CEMSolver
model: ???
batch_size: 8
num_samples: 300
var_scale: 1.0
n_steps: 30
topk: 30
device: cuda
seed: ${seed}
world:
env_name: swm/TwoRoom-v1
num_envs: ${eval.num_eval}
max_episode_steps: ???
history_size: 1
frame_skip: 1
seed: 42
policy: two-room/tworoom/lejepa
inference_precision: fp16
dataset:
stats: ${eval.dataset_name}
keys_to_cache:
- action
- proprio
plan_config:
horizon: 5
receding_horizon: 5
action_block: 5
eval:
num_eval: 50
goal_offset_steps: 25
eval_budget: 50
img_size: 224
dataset_name: tworoom
callables:
- method: _set_state
args:
state:
value: proprio
- method: _set_goal_state
args:
goal_state:
value: goal_proprio
output:
filename: tworoom_results.txt
multi_gpu:
enabled: true
devices:
- 0
- 1
==== RESULTS ====
metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False,
True, True, True, True, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, True, True]), 'seeds': None}
evaluation_time: 38.510936975479126 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead
==== CONFIG ====
cache_dir: null
solver:
_target_: stable_worldmodel.solver.CEMSolver
model: ???
batch_size: 8
num_samples: 300
var_scale: 1.0
n_steps: 30
topk: 30
device: cuda
seed: ${seed}
world:
env_name: swm/TwoRoom-v1
num_envs: ${eval.num_eval}
max_episode_steps: ???
history_size: 1
frame_skip: 1
seed: 42
policy: two-room/tworoom/lejepa
inference_precision: fp16
dataset:
stats: ${eval.dataset_name}
keys_to_cache:
- action
- proprio
plan_config:
horizon: 5
receding_horizon: 5
action_block: 5
eval:
num_eval: 50
goal_offset_steps: 25
eval_budget: 50
img_size: 224
dataset_name: tworoom
callables:
- method: _set_state
args:
state:
value: proprio
- method: _set_goal_state
args:
goal_state:
value: goal_proprio
output:
filename: tworoom_results.txt
multi_gpu:
enabled: true
devices:
- 0
- 1
- 2
- 3
==== RESULTS ====
metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False,
True, True, True, True, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, False, True, True, True, True, True, True,
True, True, True, True, True]), 'seeds': None}
evaluation_time: 80.60203051567078 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead
==== CONFIG ====
cache_dir: null
solver:
_target_: stable_worldmodel.solver.CEMSolver
model: ???
batch_size: 8
num_samples: 300
var_scale: 1.0
n_steps: 30
topk: 30
device: cuda
seed: ${seed}
world:
env_name: swm/TwoRoom-v1
num_envs: ${eval.num_eval}
max_episode_steps: ???
history_size: 1
frame_skip: 1
seed: 42
policy: two-room/tworoom/lejepa
inference_precision: fp16
dataset:
stats: ${eval.dataset_name}
keys_to_cache:
- action
- proprio
plan_config:
horizon: 5
receding_horizon: 5
action_block: 5
eval:
num_eval: 50
goal_offset_steps: 25
eval_budget: 50
img_size: 224
dataset_name: tworoom
callables:
- method: _set_state
args:
state:
value: proprio
- method: _set_goal_state
args:
goal_state:
value: goal_proprio
output:
filename: tworoom_results.txt
==== RESULTS ====
metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, False, False, True, True, True, True, True,
True, True, True, True, True]), 'seeds': None}
evaluation_time: 31.07235813140869 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead
==== CONFIG ====
cache_dir: null
solver:
_target_: stable_worldmodel.solver.CEMSolver
model: ???
batch_size: 8
num_samples: 300
var_scale: 1.0
n_steps: 30
topk: 30
device: cuda
seed: ${seed}
world:
env_name: swm/TwoRoom-v1
num_envs: ${eval.num_eval}
max_episode_steps: ???
history_size: 1
frame_skip: 1
seed: 42
policy: two-room/tworoom/lejepa
inference_precision: fp16
dataset:
stats: ${eval.dataset_name}
keys_to_cache:
- action
- proprio
plan_config:
horizon: 5
receding_horizon: 5
action_block: 5
eval:
num_eval: 50
goal_offset_steps: 25
eval_budget: 50
img_size: 224
dataset_name: tworoom
callables:
- method: _set_state
args:
state:
value: proprio
- method: _set_goal_state
args:
goal_state:
value: goal_proprio
output:
filename: tworoom_results.txt
==== RESULTS ====
metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, False, False, True, True, True, True, True,
True, True, True, True, True]), 'seeds': None}
evaluation_time: 30.950740575790405 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead
==== CONFIG ====
cache_dir: null
solver:
_target_: stable_worldmodel.solver.CEMSolver
model: ???
batch_size: 8
num_samples: 300
var_scale: 1.0
n_steps: 30
topk: 30
device: cuda
seed: ${seed}
world:
env_name: swm/TwoRoom-v1
num_envs: ${eval.num_eval}
max_episode_steps: ???
history_size: 1
frame_skip: 1
seed: 42
policy: two-room/tworoom/lejepa
inference_precision: fp16
dataset:
stats: ${eval.dataset_name}
keys_to_cache:
- action
- proprio
plan_config:
horizon: 5
receding_horizon: 5
action_block: 5
eval:
num_eval: 50
goal_offset_steps: 25
eval_budget: 50
img_size: 224
dataset_name: tworoom
callables:
- method: _set_state
args:
state:
value: proprio
- method: _set_goal_state
args:
goal_state:
value: goal_proprio
output:
filename: tworoom_results.txt
multi_gpu:
enabled: true
devices:
- 0
- 1
- 2
==== RESULTS ====
metrics: {'success_rate': 92.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, True, True]), 'seeds': None}
evaluation_time: 191.0513756275177 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead
==== CONFIG ====
cache_dir: null
solver:
_target_: stable_worldmodel.solver.CEMSolver
model: ???
batch_size: 8
num_samples: 300
var_scale: 1.0
n_steps: 30
topk: 30
device: cuda
seed: ${seed}
world:
env_name: swm/TwoRoom-v1
num_envs: ${eval.num_eval}
max_episode_steps: ???
history_size: 1
frame_skip: 1
seed: 42
policy: two-room/tworoom/lejepa
inference_precision: fp16
dataset:
stats: ${eval.dataset_name}
keys_to_cache:
- action
- proprio
plan_config:
horizon: 5
receding_horizon: 5
action_block: 5
eval:
num_eval: 50
goal_offset_steps: 25
eval_budget: 50
img_size: 224
dataset_name: tworoom
callables:
- method: _set_state
args:
state:
value: proprio
- method: _set_goal_state
args:
goal_state:
value: goal_proprio
output:
filename: tworoom_results.txt
multi_gpu:
enabled: true
devices:
- 0
- 1
- 2
==== RESULTS ====
metrics: {'success_rate': 92.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, True, True]), 'seeds': None}
evaluation_time: 133.4684717655182 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead
==== CONFIG ====
cache_dir: null
solver:
_target_: stable_worldmodel.solver.CEMSolver
model: ???
batch_size: 8
num_samples: 300
var_scale: 1.0
n_steps: 30
topk: 30
device: cuda
seed: ${seed}
world:
env_name: swm/TwoRoom-v1
num_envs: ${eval.num_eval}
max_episode_steps: ???
history_size: 1
frame_skip: 1
seed: 42
policy: two-room/tworoom/lejepa
inference_precision: fp16
dataset:
stats: ${eval.dataset_name}
keys_to_cache:
- action
- proprio
plan_config:
horizon: 5
receding_horizon: 5
action_block: 5
eval:
num_eval: 50
goal_offset_steps: 25
eval_budget: 50
img_size: 224
dataset_name: tworoom
callables:
- method: _set_state
args:
state:
value: proprio
- method: _set_goal_state
args:
goal_state:
value: goal_proprio
output:
filename: tworoom_results.txt
multi_gpu:
enabled: true
devices:
- 0
- 1
==== RESULTS ====
metrics: {'success_rate': 88.0, 'episode_successes': array([ True, False, True, False, True, True, True, True, False,
True, True, True, True, True, True, True, True, False,
True, True, True, True, True, True, True, True, True,
True, True, True, True, False, True, True, True, True,
True, True, True, False, True, True, True, True, True,
True, True, True, True, True]), 'seeds': None}
evaluation_time: 38.12551426887512 seconds
inference_precision: fp16
inference_compile_target: predictor
inference_compile_mode: reduce-overhead