SkyNetWalker commited on
Commit
3bf4da9
·
verified ·
1 Parent(s): 0775334

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -16
app.py CHANGED
@@ -2,17 +2,14 @@
2
  #huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
3
 
4
  import gradio as gr
5
- from openai import OpenAI
6
  import os
7
 
8
  ACCESS_TOKEN = os.getenv("myHFtoken")
9
 
10
  print("Access token loaded.")
11
 
12
- client = OpenAI(
13
- base_url="https://api-inference.huggingface.co/v1/",
14
- api_key=ACCESS_TOKEN,
15
- )
16
 
17
  print("Client initialized.")
18
 
@@ -43,8 +40,8 @@ def respond(
43
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
44
  print(f"Selected model: {model_name}")
45
 
 
46
  messages = [{"role": "system", "content": system_message}]
47
-
48
  for val in history:
49
  if val[0]:
50
  messages.append({"role": "user", "content": val[0]})
@@ -54,19 +51,21 @@ def respond(
54
  print(f"Added assistant message to context: {val[1]}")
55
 
56
  messages.append({"role": "user", "content": message})
57
-
58
  response = ""
59
- print("Sending request to OpenAI API.")
60
-
61
- for message in client.chat.completions.create(
 
62
  model=model_name,
 
63
  max_tokens=max_tokens,
64
- stream=True,
65
  temperature=temperature,
66
  top_p=top_p,
67
- messages=messages,
68
- ):
69
- token = message.choices[0].delta.content
 
 
70
  print(f"Received token: {token}")
71
  response += token
72
  yield response
@@ -74,16 +73,16 @@ def respond(
74
  print("Completed response generation.")
75
 
76
  models = [
 
77
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
78
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
79
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
80
- "ngxson/MiniThinky-v2-1B-Llama-3.2",
81
- "meta-llama/Llama-3.2-3B-Instruct",
82
  "PowerInfer/SmallThinker-3B-Preview",
83
  "NovaSky-AI/Sky-T1-32B-Preview",
84
  "Qwen/QwQ-32B-Preview",
85
  "Qwen/Qwen2.5-Coder-32B-Instruct",
86
  "microsoft/Phi-3-mini-128k-instruct",
 
87
  ]
88
 
89
  with gr.Blocks() as demo:
 
2
  #huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
3
 
4
  import gradio as gr
5
+ from huggingface_hub import InferenceClient
6
  import os
7
 
8
  ACCESS_TOKEN = os.getenv("myHFtoken")
9
 
10
  print("Access token loaded.")
11
 
12
+ client = InferenceClient(api_key=ACCESS_TOKEN)
 
 
 
13
 
14
  print("Client initialized.")
15
 
 
40
  print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
41
  print(f"Selected model: {model_name}")
42
 
43
+ # Prepare messages for the Hugging Face API
44
  messages = [{"role": "system", "content": system_message}]
 
45
  for val in history:
46
  if val[0]:
47
  messages.append({"role": "user", "content": val[0]})
 
51
  print(f"Added assistant message to context: {val[1]}")
52
 
53
  messages.append({"role": "user", "content": message})
 
54
  response = ""
55
+ print("Sending request to Hugging Face API.")
56
+
57
+ # Stream response from Hugging Face API
58
+ completion = client.chat.completions.create(
59
  model=model_name,
60
+ messages=messages,
61
  max_tokens=max_tokens,
 
62
  temperature=temperature,
63
  top_p=top_p,
64
+ stream=True,
65
+ )
66
+
67
+ for message in completion:
68
+ token = message.delta.get("content", "")
69
  print(f"Received token: {token}")
70
  response += token
71
  yield response
 
73
  print("Completed response generation.")
74
 
75
  models = [
76
+ "meta-llama/Llama-3.2-3B-Instruct",
77
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
78
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
79
  "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
 
 
80
  "PowerInfer/SmallThinker-3B-Preview",
81
  "NovaSky-AI/Sky-T1-32B-Preview",
82
  "Qwen/QwQ-32B-Preview",
83
  "Qwen/Qwen2.5-Coder-32B-Instruct",
84
  "microsoft/Phi-3-mini-128k-instruct",
85
+ "microsoft/phi-4"
86
  ]
87
 
88
  with gr.Blocks() as demo: