Update modeling_phi3.py
Browse files- modeling_phi3.py +1 -5
modeling_phi3.py
CHANGED
@@ -1294,10 +1294,6 @@ class Phi3ForCausalLM(Phi3PreTrainedModel):
|
|
1294 |
cache_length = past_length = past_key_values[0][0].shape[2]
|
1295 |
max_cache_length = None
|
1296 |
|
1297 |
-
else:
|
1298 |
-
cache_length = past_length = past_key_values[0][0].shape[2]
|
1299 |
-
max_cache_length = None
|
1300 |
-
|
1301 |
# Keep only the unprocessed tokens:
|
1302 |
# 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
|
1303 |
# some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
|
@@ -1564,4 +1560,4 @@ class Phi3ForTokenClassification(Phi3PreTrainedModel):
|
|
1564 |
logits=logits,
|
1565 |
hidden_states=model_outputs.hidden_states,
|
1566 |
attentions=model_outputs.attentions,
|
1567 |
-
)
|
|
|
1294 |
cache_length = past_length = past_key_values[0][0].shape[2]
|
1295 |
max_cache_length = None
|
1296 |
|
|
|
|
|
|
|
|
|
1297 |
# Keep only the unprocessed tokens:
|
1298 |
# 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
|
1299 |
# some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
|
|
|
1560 |
logits=logits,
|
1561 |
hidden_states=model_outputs.hidden_states,
|
1562 |
attentions=model_outputs.attentions,
|
1563 |
+
)
|