Spaces:

inoculatemedia
/

NanoVLLM

Build error

NanoVLLM / app.py

Update app.py

8c400d4 verified 17 days ago

960 Bytes

	import os
	from nanovllm import LLM, SamplingParams
	from transformers import AutoTokenizer
	import torch

	def main():
	path = os.path.expanduser("~/huggingface/Qwen3-0.6B/")
	tokenizer = AutoTokenizer.from_pretrained(path)
	llm = LLM(path, enforce_eager=True, tensor_parallel_size=1)

	sampling_params = SamplingParams(temperature=0.6, max_tokens=256)
	prompts = [
	"introduce yourself",
	"list all prime numbers within 100",
	]
	prompts = [
	tokenizer.apply_chat_template(
	[{"role": "user", "content": prompt}],
	tokenize=False,
	add_generation_prompt=True,
	enable_thinking=True
	)
	for prompt in prompts
	]
	outputs = llm.generate(prompts, sampling_params)

	for prompt, output in zip(prompts, outputs):
	print("\n")
	print(f"Prompt: {prompt!r}")
	print(f"Completion: {output['text']!r}")


	if __name__ == "__main__":
	main()