llamafy
/

ByteDance-Seed_Seed-OSS-36B-Instruct-llamafied

Text Generation

text-generation-inference

Model card Files Files and versions

ByteDance-Seed_Seed-OSS-36B-Instruct-llamafied / test_byte.py

Downtown-Case's picture

Upload folder using huggingface_hub

d448ac8 verified 16 days ago

history blame contribute delete

725 Bytes

	from transformers import AutoModelForCausalLM, AutoTokenizer
	import os
	import re

	model_name_or_path = ""

	tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
	model = AutoModelForCausalLM.from_pretrained(model_name_or_path, device_map="auto", load_in_4bit=True) # You may want to use bfloat16 and/or move to GPU here
	messages = [
	{"role": "user", "content": "How to make pasta?"},
	]
	tokenized_chat = tokenizer.apply_chat_template(
	messages,
	tokenize=True,
	add_generation_prompt=True,
	return_tensors="pt",
	thinking_budget=0 # control the thinking budget
	)

	outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=60)

	output_text = tokenizer.decode(outputs[0])

	print(output_text)