Spaces:
Runtime error
Runtime error
from fastapi import FastAPI | |
from pydantic import BaseModel | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
app = FastAPI() | |
MODEL_NAME = "BlackGoku7/deepseek-ai-DeepSeek-R1-Distill-Qwen-14B" | |
# Load tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
device_map="auto", | |
torch_dtype=torch.bfloat16, # Or torch.float16 if your Space supports it | |
trust_remote_code=True | |
) | |
model.eval() | |
class Prompt(BaseModel): | |
text: str | |
max_new_tokens: int = 200 | |
def root(): | |
return {"message": "POST to /generate with {'text': 'your prompt'}"} | |
def generate(prompt: Prompt): | |
inputs = tokenizer(prompt.text, return_tensors="pt").to(model.device) | |
output = model.generate( | |
**inputs, | |
max_new_tokens=prompt.max_new_tokens, | |
do_sample=True, | |
temperature=0.7, | |
top_p=0.9, | |
) | |
decoded = tokenizer.decode(output[0], skip_special_tokens=True) | |
return {"response": decoded} |