sokada commited on
Commit
a071714
·
verified ·
1 Parent(s): efb4cf5

Update modeling_plamo.py

Browse files

seq len が attention window size と同一なとき、attention mask を作らずに forward できるはずですが、現状 require_attn_mask の条件が厳しすぎるため
https://huggingface.co/pfnet/plamo-2-1b/blob/main/modeling_plamo.py#L1120
の条件の not に対応するようにしました

Files changed (1) hide show
  1. modeling_plamo.py +2 -2
modeling_plamo.py CHANGED
@@ -1434,7 +1434,7 @@ class Plamo2Model(Plamo2PreTrainedModel):
1434
  require_attn_mask = False
1435
  if not self.training or past_key_values is not None:
1436
  require_attn_mask = True
1437
- if seq_length_with_past >= self.config.attention_window_size:
1438
  require_attn_mask = True
1439
  if require_attn_mask and attention_mask is None:
1440
  attention_mask = torch.ones(
@@ -1704,4 +1704,4 @@ class Bias(nn.Module):
1704
  self,
1705
  x: torch.Tensor,
1706
  ) -> torch.Tensor:
1707
- return x + self._bias
 
1434
  require_attn_mask = False
1435
  if not self.training or past_key_values is not None:
1436
  require_attn_mask = True
1437
+ if seq_length_with_past > self.config.attention_window_size + 1:
1438
  require_attn_mask = True
1439
  if require_attn_mask and attention_mask is None:
1440
  attention_mask = torch.ones(
 
1704
  self,
1705
  x: torch.Tensor,
1706
  ) -> torch.Tensor:
1707
+ return x + self._bias