optimum-internal-testing
/

tiny-random-snowflake

Model card Files Files and versions Community

echarlaix HF Staff commited on 4 days ago

Commit

0d2270a

·

1 Parent(s): 3abc86b

legacy cache support

Files changed (1) hide show

modeling_arctic.py +7 -3

modeling_arctic.py CHANGED Viewed

@@ -1763,9 +1763,13 @@ class ArcticForCausalLM(ArcticPreTrainedModel):
     ):
         # Omit tokens covered by past_key_values
         if past_key_values is not None:
-            cache_length = past_key_values.get_seq_length()
-            past_length = past_key_values.seen_tokens
-            max_cache_length = past_key_values.get_max_length() if hasattr(past_key_values, "get_max_length") else past_key_values.get_max_cache_shape()
             # Keep only the unprocessed tokens:
             # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where

     ):
         # Omit tokens covered by past_key_values
         if past_key_values is not None:
+            if isinstance(past_key_values, Cache):
+                cache_length = past_key_values.get_seq_length()
+                past_length = past_key_values.seen_tokens
+                max_cache_length = past_key_values.get_max_length() if hasattr(past_key_values, "get_max_length") else past_key_values.get_max_cache_shape()
+            else:
+                cache_length = past_length = past_key_values[0][0].shape[2]
+                max_cache_length = None
             # Keep only the unprocessed tokens:
             # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where