estrogen
/

ModernBERG-base-uninit

Model card Files Files and versions

Fizzarolli commited on Jan 16

Commit

ef095ca

·

verified ·

1 Parent(s): 8f10a7a

Update modernberg_model.py

Files changed (1) hide show

modernberg_model.py +1 -13

modernberg_model.py CHANGED Viewed

@@ -411,8 +411,6 @@ class GriffinRecurrentblock(nn.Module):
         input_states: torch.Tensor,
         position_ids: torch.Tensor,
         attention_mask: torch.Tensor,
-        cache_position: torch.Tensor,
-        use_cache: bool = True,
         **kwargs
     ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
         _, seq_len, _ = input_states.shape
@@ -423,17 +421,7 @@ class GriffinRecurrentblock(nn.Module):
         x_branch = self.linear_x(input_states)
         x_branch = x_branch.transpose(1, 2)
-        if use_cache:
-            if cache_position.shape[0] != 1:  # prefill
-                self.conv1d_state = nn.functional.pad(x_branch, (self.conv1d_width - x_branch.shape[-1] - 1, 0))
-                x_branch = self.conv_1d(x_branch)[..., :seq_len]
-            else:  # decoding
-                conv_state = torch.cat((self.conv1d_state, x_branch), -1)
-                x_branch = torch.sum(conv_state * self.conv_1d.weight[:, 0, :], dim=-1) + self.conv_1d.bias
-                x_branch = x_branch.unsqueeze(-1)
-                self.conv1d_state = conv_state[:, :, 1:]
-        else:
-            x_branch = self.conv_1d(x_branch)[..., :seq_len]
         x_branch = self.rg_lru(x_branch.transpose(1, 2), position_ids)

         input_states: torch.Tensor,
         position_ids: torch.Tensor,
         attention_mask: torch.Tensor,
         **kwargs
     ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]:
         _, seq_len, _ = input_states.shape
         x_branch = self.linear_x(input_states)
         x_branch = x_branch.transpose(1, 2)
+        x_branch = self.conv_1d(x_branch)[..., :seq_len]
         x_branch = self.rg_lru(x_branch.transpose(1, 2), position_ids)