DDIM loop 内小张量分配优化,attention mask 缓存到 GPU
This commit is contained in:
@@ -275,7 +275,8 @@ class CrossAttention(nn.Module):
|
||||
attn_mask_aa = self._get_attn_mask_aa(x.shape[0],
|
||||
q.shape[1],
|
||||
k_aa.shape[1],
|
||||
block_size=16).to(k_aa.device)
|
||||
block_size=16,
|
||||
device=k_aa.device)
|
||||
else:
|
||||
if not spatial_self_attn:
|
||||
assert 1 > 2, ">>> ERROR: you should never go into here ..."
|
||||
@@ -386,14 +387,26 @@ class CrossAttention(nn.Module):
|
||||
|
||||
return self.to_out(out)
|
||||
|
||||
def _get_attn_mask_aa(self, b, l1, l2, block_size=16):
|
||||
def _get_attn_mask_aa(self, b, l1, l2, block_size=16, device=None):
|
||||
cache_key = (b, l1, l2, block_size)
|
||||
if hasattr(self, '_attn_mask_aa_cache_key') and self._attn_mask_aa_cache_key == cache_key:
|
||||
cached = self._attn_mask_aa_cache
|
||||
if device is not None and cached.device != torch.device(device):
|
||||
cached = cached.to(device)
|
||||
self._attn_mask_aa_cache = cached
|
||||
return cached
|
||||
|
||||
target_device = device if device is not None else 'cpu'
|
||||
num_token = l2 // block_size
|
||||
start_positions = ((torch.arange(b) % block_size) + 1) * num_token
|
||||
col_indices = torch.arange(l2)
|
||||
start_positions = ((torch.arange(b, device=target_device) % block_size) + 1) * num_token
|
||||
col_indices = torch.arange(l2, device=target_device)
|
||||
mask_2d = col_indices.unsqueeze(0) >= start_positions.unsqueeze(1)
|
||||
mask = mask_2d.unsqueeze(1).expand(b, l1, l2)
|
||||
attn_mask = torch.zeros_like(mask, dtype=torch.float)
|
||||
attn_mask = torch.zeros(b, l1, l2, dtype=torch.float, device=target_device)
|
||||
attn_mask[mask] = float('-inf')
|
||||
|
||||
self._attn_mask_aa_cache_key = cache_key
|
||||
self._attn_mask_aa_cache = attn_mask
|
||||
return attn_mask
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user