Upload folder using huggingface_hub

Files changed (4) hide show

.ruff_cache/.gitignore ADDED Viewed


1	+ # Automatically created by ruff.
2	+ *

.ruff_cache/0.12.8/5591301162804142724 ADDED Viewed

Binary file (151 Bytes). View file

.ruff_cache/CACHEDIR.TAG ADDED Viewed

	@@ -0,0 +1 @@


1	+ Signature: 8a477f597d28d172789f06886806bc55

custom_generate/generate.py CHANGED Viewed

@@ -232,7 +232,9 @@ def _contrastive_search(
         ):
             # prepare inputs
             model_kwargs["use_cache"] = True
-            model_inputs = model.prepare_inputs_for_generation(input_ids, **model_kwargs)
             # encode the given prefix and prepare model inputs; encoder-decoder model process the prefix and save
             # the `encoder_outputs`
@@ -369,6 +371,11 @@ def _contrastive_search(
                     outputs["past_key_values"] = None
                     # Remove last token from past K-V since we don't want to append it at this point
                     model_kwargs["past_key_values"].crop(-1)
                 all_outputs.append(outputs)
             outputs = stack_model_outputs(all_outputs, model.config.get_text_config())
@@ -605,5 +612,7 @@ def generate(model, *args, **kwargs):
         penalty_alpha (`float`): The alpha value for the degeneration penalty.
         top_k (`int`): The number of candidates to consider at each step.
     """
-    generation_outputs = GenerationMixin.generate(model, *args, custom_generate=_contrastive_search, **kwargs)
     return generation_outputs

         ):
             # prepare inputs
             model_kwargs["use_cache"] = True
+            model_inputs = model.prepare_inputs_for_generation(
+                input_ids, **model_kwargs
+            )
             # encode the given prefix and prepare model inputs; encoder-decoder model process the prefix and save
             # the `encoder_outputs`
                     outputs["past_key_values"] = None
                     # Remove last token from past K-V since we don't want to append it at this point
                     model_kwargs["past_key_values"].crop(-1)
+                else:
+                    raise ValueError(
+                        f"Unsupported cache type: {type(outputs['past_key_values'])}. Contrastive search requires "
+                        "dynamic cache, so set `cache_implementation='dynamic'` in the generation config."
+                    )
                 all_outputs.append(outputs)
             outputs = stack_model_outputs(all_outputs, model.config.get_text_config())
         penalty_alpha (`float`): The alpha value for the degeneration penalty.
         top_k (`int`): The number of candidates to consider at each step.
     """
+    generation_outputs = GenerationMixin.generate(
+        model, *args, custom_generate=_contrastive_search, **kwargs
+    )
     return generation_outputs