Update modeling_plamo.py (#9)

Browse files

- Update modeling_plamo.py (e8dcdfe415d37765d1cf4396cea60cbeb6a8f0b9)

Co-authored-by: Shogo Murai <[email protected]>

Files changed (1) hide show

modeling_plamo.py +12 -2

modeling_plamo.py CHANGED Viewed

@@ -19,6 +19,7 @@ import torch
 from torch import nn
 from torch.nn import functional as F
 from transformers import PretrainedConfig, PreTrainedModel
 from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
@@ -327,7 +328,8 @@ class Plamo2Cache(torch.nn.Module):
                     if sequence_length is not None
                     else layer_cache.key.shape[2]
                 )
-        assert sequence_length is not None
         return sequence_length
     def get_max_length(self) -> int | None:
@@ -1387,7 +1389,7 @@ class Plamo2Model(Plamo2PreTrainedModel):
         input_ids: Optional[torch.LongTensor] = None,
         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.Tensor] = None,
-        past_key_values: Optional[Plamo2Cache] = None,
         inputs_embeds: Optional[torch.Tensor] = None,
         image_features: Optional[torch.Tensor] = None,
         use_cache: Optional[bool] = None,
@@ -1419,6 +1421,14 @@ class Plamo2Model(Plamo2PreTrainedModel):
         seq_length_with_past = seq_length
         past_key_values_length = 0
         if past_key_values is not None:
             past_key_values_length = past_key_values.get_seq_length()
             seq_length_with_past = seq_length_with_past + past_key_values_length
         assert cache_position is None, "cache_position is not supported yet"

 from torch import nn
 from torch.nn import functional as F
 from transformers import PretrainedConfig, PreTrainedModel
+from transformers.cache_utils import DynamicCache
 from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
                     if sequence_length is not None
                     else layer_cache.key.shape[2]
                 )
+        if sequence_length is None:
+            return 0
         return sequence_length
     def get_max_length(self) -> int | None:
         input_ids: Optional[torch.LongTensor] = None,
         attention_mask: Optional[torch.Tensor] = None,
         position_ids: Optional[torch.Tensor] = None,
+        past_key_values: Optional[Plamo2Cache | DynamicCache] = None,
         inputs_embeds: Optional[torch.Tensor] = None,
         image_features: Optional[torch.Tensor] = None,
         use_cache: Optional[bool] = None,
         seq_length_with_past = seq_length
         past_key_values_length = 0
         if past_key_values is not None:
+            # In some `transformers` versions, `past_key_values` may be a `DynamicCache` object.
+            if not isinstance(past_key_values, Plamo2Cache):
+                past_key_values_prev = past_key_values
+                past_key_values = Plamo2Cache(self.config)
+                # If `past_key_values` is a `DynamicCache` object, it must be empty
+                assert len(past_key_values_prev) == 0
+            assert isinstance(past_key_values, Plamo2Cache)
             past_key_values_length = past_key_values.get_seq_length()
             seq_length_with_past = seq_length_with_past + past_key_values_length
         assert cache_position is None, "cache_position is not supported yet"