Optimize inference path: add predictor-only torch.compile with reduce-overhead

2026-04-09 10:00:13 +00:00
parent f2750daace
commit 38be7d3bef
1 changed files with 70 additions and 0 deletions
--- a/eval.py
+++ b/eval.py
@@ -67,6 +67,63 @@ def get_profile_cfg(cfg):
    return profile_cfg
 def get_compile_cfg(cfg):
    compile_cfg = {
        "enabled": True,
        "target": "predictor",
        "mode": "reduce-overhead",
        "fullgraph": False,
        "dynamic": False,
        "cuda_only": True,
    }
    cfg_compile = cfg.get("compile")
    if cfg_compile is not None:
        compile_cfg.update(OmegaConf.to_container(cfg_compile, resolve=True))
    return compile_cfg
 def maybe_compile_inference_target(model, cfg, device):
    compile_cfg = get_compile_cfg(cfg)
    compile_target = "disabled"
    if not compile_cfg["enabled"]:
        return model, compile_cfg, compile_target
    if not hasattr(torch, "compile"):
        print("torch.compile is unavailable, skipping inference compilation.")
        return model, compile_cfg, compile_target
    if compile_cfg["cuda_only"] and not str(device).startswith("cuda"):
        print("Skipping torch.compile because compile.cuda_only=true and device is not CUDA.")
        return model, compile_cfg, compile_target
    target = str(compile_cfg["target"]).lower()
    compile_kwargs = {
        "mode": compile_cfg["mode"],
        "fullgraph": compile_cfg["fullgraph"],
        "dynamic": compile_cfg["dynamic"],
    }
    if target == "predictor":
        if not hasattr(model, "predictor"):
            print("Requested compile target 'predictor' is unavailable on the model.")
            return model, compile_cfg, compile_target
        model.predictor = torch.compile(model.predictor, **compile_kwargs)
        compile_target = "predictor"
    elif target == "predict":
        if not hasattr(model, "predict"):
            print("Requested compile target 'predict' is unavailable on the model.")
            return model, compile_cfg, compile_target
        model.predict = torch.compile(model.predict, **compile_kwargs)
        compile_target = "predict"
    else:
        print(
            f"Unsupported compile.target={target}. Expected one of: predictor, predict."
        )
    return model, compile_cfg, compile_target
 def get_inference_context(cfg, device):
    precision = str(cfg.get("inference_precision", "fp32")).lower()
    device_type = "cuda" if device.startswith("cuda") else "cpu"
@@ -182,9 +239,17 @@ def run(cfg: DictConfig):
        model = model.to(device)
        model = model.eval()
        model.requires_grad_(False)
        model, compile_cfg, compile_target = maybe_compile_inference_target(
            model, cfg, device
        )
        print(f"model parameter dtype: {next(model.parameters()).dtype}")
        inference_ctx, inference_precision = get_inference_context(cfg, device)
        print(f"inference execution precision: {inference_precision}")
        if compile_target != "disabled":
            print(
                f"inference compile target: {compile_target} "
                f"(mode={compile_cfg['mode']})"
            )
        model.interpolate_pos_encoding = True
        config = swm.PlanConfig(**cfg.plan_config)
        solver = hydra.utils.instantiate(cfg.solver, model=model)
@@ -196,6 +261,8 @@ def run(cfg: DictConfig):
        policy = swm.policy.RandomPolicy()
        inference_ctx = nullcontext()
        inference_precision = "fp32"
        compile_cfg = get_compile_cfg(cfg)
        compile_target = "disabled"
    # Hydra switches the working directory to the per-run outputs folder.
    # Keep all generated artifacts with that run instead of scattering them
@@ -274,6 +341,9 @@ def run(cfg: DictConfig):
        f.write(f"metrics: {metrics}\n")
        f.write(f"evaluation_time: {end_time - start_time} seconds\n")
        f.write(f"inference_precision: {inference_precision}\n")
        f.write(f"inference_compile_target: {compile_target}\n")
        if compile_target != "disabled":
            f.write(f"inference_compile_mode: {compile_cfg['mode']}\n")
        if profile_cfg["enabled"]:
            f.write(f"profile_dir: {profile_dir}\n")
            if profile_summary_path is not None: