速度变化不明显psnr显著提升

This commit is contained in:
qhy
2026-02-11 16:38:21 +08:00
parent f386a5810b
commit 3101252c25
4 changed files with 58 additions and 37 deletions

View File

@@ -988,7 +988,7 @@ class LatentDiffusion(DDPM):
def instantiate_cond_stage(self, config: OmegaConf) -> None:
"""
Build the conditioning stage model.
Build the conditioning stage model. Frozen models are converted to FP16.
Args:
config: OmegaConf config describing the conditioning model to instantiate.
@@ -1000,6 +1000,7 @@ class LatentDiffusion(DDPM):
self.cond_stage_model.train = disabled_train
for param in self.cond_stage_model.parameters():
param.requires_grad = False
self.cond_stage_model.half()
else:
model = instantiate_from_config(config)
self.cond_stage_model = model
@@ -1014,17 +1015,18 @@ class LatentDiffusion(DDPM):
Returns:
Conditioning embedding as a tensor (shape depends on cond model).
"""
if self.cond_stage_forward is None:
if hasattr(self.cond_stage_model, 'encode') and callable(
self.cond_stage_model.encode):
c = self.cond_stage_model.encode(c)
if isinstance(c, DiagonalGaussianDistribution):
c = c.mode()
with torch.cuda.amp.autocast(dtype=torch.float16):
if self.cond_stage_forward is None:
if hasattr(self.cond_stage_model, 'encode') and callable(
self.cond_stage_model.encode):
c = self.cond_stage_model.encode(c)
if isinstance(c, DiagonalGaussianDistribution):
c = c.mode()
else:
c = self.cond_stage_model(c)
else:
c = self.cond_stage_model(c)
else:
assert hasattr(self.cond_stage_model, self.cond_stage_forward)
c = getattr(self.cond_stage_model, self.cond_stage_forward)(c)
assert hasattr(self.cond_stage_model, self.cond_stage_forward)
c = getattr(self.cond_stage_model, self.cond_stage_forward)(c)
return c
def get_first_stage_encoding(
@@ -1957,6 +1959,7 @@ class LatentVisualDiffusion(LatentDiffusion):
self.image_proj_model.train = disabled_train
for param in self.image_proj_model.parameters():
param.requires_grad = False
self.image_proj_model.half()
def _init_embedder(self, config: OmegaConf, freeze: bool = True) -> None:
"""
@@ -1972,6 +1975,7 @@ class LatentVisualDiffusion(LatentDiffusion):
self.embedder.train = disabled_train
for param in self.embedder.parameters():
param.requires_grad = False
self.embedder.half()
def init_normalizers(self, normalize_config: OmegaConf,
dataset_stats: Mapping[str, Any]) -> None:
@@ -2175,8 +2179,9 @@ class LatentVisualDiffusion(LatentDiffusion):
(random_num < 3 * self.uncond_prob).float(), "n -> n 1 1 1")
cond_img = input_mask * img
cond_img_emb = self.embedder(cond_img)
cond_img_emb = self.image_proj_model(cond_img_emb)
with torch.cuda.amp.autocast(dtype=torch.float16):
cond_img_emb = self.embedder(cond_img)
cond_img_emb = self.image_proj_model(cond_img_emb)
if self.model.conditioning_key == 'hybrid':
if self.interp_mode:
@@ -2191,11 +2196,12 @@ class LatentVisualDiffusion(LatentDiffusion):
repeat=z.shape[2])
cond["c_concat"] = [img_cat_cond]
cond_action = self.action_projector(action)
cond_action_emb = self.agent_action_pos_emb + cond_action
# Get conditioning states
cond_state = self.state_projector(obs_state)
cond_state_emb = self.agent_state_pos_emb + cond_state
with torch.cuda.amp.autocast(dtype=torch.float16):
cond_action = self.action_projector(action)
cond_action_emb = self.agent_action_pos_emb + cond_action
# Get conditioning states
cond_state = self.state_projector(obs_state)
cond_state_emb = self.agent_state_pos_emb + cond_state
if self.decision_making_only:
is_sim_mode = False