Upload folder using huggingface_hub
Browse files- modeling_deepseek.py +5 -3
modeling_deepseek.py
CHANGED
@@ -445,10 +445,12 @@ class DeepSeekForCausalLM(DeepSeekPreTrainedModel):
|
|
445 |
self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, cache_position=None, **kwargs
|
446 |
):
|
447 |
# Standard implementation for generation
|
|
|
|
|
448 |
if past_key_values is not None:
|
449 |
-
if inputs_embeds is not None:
|
450 |
input_ids = input_ids[:, -cache_position.shape[0] :]
|
451 |
-
elif input_ids.shape[1] != cache_position.shape[0]:
|
452 |
input_ids = input_ids[:, cache_position]
|
453 |
|
454 |
if attention_mask is not None and position_ids is None:
|
@@ -457,7 +459,7 @@ class DeepSeekForCausalLM(DeepSeekPreTrainedModel):
|
|
457 |
if past_key_values:
|
458 |
position_ids = position_ids[:, -input_ids.shape[1] :]
|
459 |
|
460 |
-
if inputs_embeds is not None and cache_position[0] == 0:
|
461 |
model_inputs = {"inputs_embeds": inputs_embeds}
|
462 |
else:
|
463 |
model_inputs = {"input_ids": input_ids}
|
|
|
445 |
self, input_ids, past_key_values=None, attention_mask=None, inputs_embeds=None, cache_position=None, **kwargs
|
446 |
):
|
447 |
# Standard implementation for generation
|
448 |
+
position_ids = None
|
449 |
+
|
450 |
if past_key_values is not None:
|
451 |
+
if inputs_embeds is not None and cache_position is not None:
|
452 |
input_ids = input_ids[:, -cache_position.shape[0] :]
|
453 |
+
elif cache_position is not None and input_ids.shape[1] != cache_position.shape[0]:
|
454 |
input_ids = input_ids[:, cache_position]
|
455 |
|
456 |
if attention_mask is not None and position_ids is None:
|
|
|
459 |
if past_key_values:
|
460 |
position_ids = position_ids[:, -input_ids.shape[1] :]
|
461 |
|
462 |
+
if inputs_embeds is not None and cache_position is not None and cache_position[0] == 0:
|
463 |
model_inputs = {"inputs_embeds": inputs_embeds}
|
464 |
else:
|
465 |
model_inputs = {"input_ids": input_ids}
|