Files changed (1) hide show
  1. app.py +46 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ import torch
6
+
7
+ app = FastAPI()
8
+
9
+ # Allow frontend requests
10
+ app.add_middleware(
11
+ CORSMiddleware,
12
+ allow_origins=["*"],
13
+ allow_credentials=True,
14
+ allow_methods=["*"],
15
+ allow_headers=["*"],
16
+ )
17
+
18
+ # Load model
19
+ model_name = "openai/gpt-oss-20b"
20
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
21
+ model = AutoModelForCausalLM.from_pretrained(
22
+ model_name,
23
+ torch_dtype=torch.float16,
24
+ device_map="auto"
25
+ )
26
+
27
+ class ChatRequest(BaseModel):
28
+ message: str
29
+
30
+ @app.post("/chat")
31
+ async def chat(req: ChatRequest):
32
+ messages = [{"role": "user", "content": req.message}]
33
+ inputs = tokenizer.apply_chat_template(
34
+ messages,
35
+ add_generation_prompt=True,
36
+ tokenize=True,
37
+ return_dict=True,
38
+ return_tensors="pt",
39
+ ).to(model.device)
40
+
41
+ outputs = model.generate(**inputs, max_new_tokens=100)
42
+ reply = tokenizer.decode(
43
+ outputs[0][inputs["input_ids"].shape[-1]:],
44
+ skip_special_tokens=True
45
+ )
46
+ return {"reply": reply}