Update modeling_plamo.py
Browse filesseq len が attention window size と同一なとき、attention mask を作らずに forward できるはずですが、現状 require_attn_mask の条件が厳しすぎるため
https://huggingface.co/pfnet/plamo-2-1b/blob/main/modeling_plamo.py#L1120
の条件の not に対応するようにしました
- modeling_plamo.py +2 -2
modeling_plamo.py
CHANGED
@@ -1434,7 +1434,7 @@ class Plamo2Model(Plamo2PreTrainedModel):
|
|
1434 |
require_attn_mask = False
|
1435 |
if not self.training or past_key_values is not None:
|
1436 |
require_attn_mask = True
|
1437 |
-
if seq_length_with_past
|
1438 |
require_attn_mask = True
|
1439 |
if require_attn_mask and attention_mask is None:
|
1440 |
attention_mask = torch.ones(
|
@@ -1704,4 +1704,4 @@ class Bias(nn.Module):
|
|
1704 |
self,
|
1705 |
x: torch.Tensor,
|
1706 |
) -> torch.Tensor:
|
1707 |
-
return x + self._bias
|
|
|
1434 |
require_attn_mask = False
|
1435 |
if not self.training or past_key_values is not None:
|
1436 |
require_attn_mask = True
|
1437 |
+
if seq_length_with_past > self.config.attention_window_size + 1:
|
1438 |
require_attn_mask = True
|
1439 |
if require_attn_mask and attention_mask is None:
|
1440 |
attention_mask = torch.ones(
|
|
|
1704 |
self,
|
1705 |
x: torch.Tensor,
|
1706 |
) -> torch.Tensor:
|
1707 |
+
return x + self._bias
|