BabyLM-community
/

babylm-multimodal-baseline-flamingo

Text Generation

text-generation-inference

Model card Files Files and versions Community

momergul commited on 18 days ago

Commit

cbc8d5f

·

verified ·

1 Parent(s): 908e39c

Upload utils.py with huggingface_hub

Files changed (1) hide show

utils.py +37 -0

utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import torch
+def exists(val):
+    return val is not None
+# for controlling freezing during training of flamingo
+def set_module_requires_grad_(module, requires_grad):
+    for param in module.parameters():
+        param.requires_grad = requires_grad
+def freeze_all_layers_(module):
+    set_module_requires_grad_(module, False)
+def unfreeze_all_layers_(module):
+    set_module_requires_grad_(module, True)
+def freeze_model_and_make_eval_(model):
+    model.eval()
+    freeze_all_layers_(model)
+def _make_att_wd_mask(
+        input_ids_shape: torch.Size,
+        dtype: torch.dtype, device: torch.device,
+        past_key_values_length: int = 0,
+        att_wd_size: int = 0,
+    ):
+    bsz, tgt_len = input_ids_shape
+    mask = torch.full((tgt_len, tgt_len), torch.finfo(dtype).min, device=device)
+    mask_cond = torch.arange(mask.size(-1), device=device)
+    mask.masked_fill_(
+            mask_cond > (mask_cond - att_wd_size).view(mask.size(-1), 1), 0)
+    mask = mask.to(dtype)
+    if past_key_values_length > 0:
+        mask = torch.cat([torch.zeros(tgt_len, past_key_values_length, dtype=dtype, device=device), mask], dim=-1)
+    return mask[None, None, :, :].expand(bsz, 1, tgt_len, tgt_len + past_key_values_length)