Downtown-Case's picture
Upload folder using huggingface_hub
d448ac8 verified
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import re
model_name_or_path = ""
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto", load_in_4bit=True) # You may want to use bfloat16 and/or move to GPU here
messages = [
{"role": "user", "content": "How to make pasta?"},
]
tokenized_chat = tokenizer.apply_chat_template(
messages,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt",
thinking_budget=0 # control the thinking budget
)
outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=60)
output_text = tokenizer.decode(outputs[0])
print(output_text)