Spaces:

SkyNetWalker
/

HF-LLMs

Running

App Files Files Community

SkyNetWalker commited on 17 days ago

Commit

3bf4da9

verified ·

1 Parent(s): 0775334

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -16

app.py CHANGED Viewed

@@ -2,17 +2,14 @@
 #huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
 import gradio as gr
-from openai import OpenAI
 import os
 ACCESS_TOKEN = os.getenv("myHFtoken")
 print("Access token loaded.")
-client = OpenAI(
-    base_url="https://api-inference.huggingface.co/v1/",
-    api_key=ACCESS_TOKEN,
-)
 print("Client initialized.")
@@ -43,8 +40,8 @@ def respond(
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Selected model: {model_name}")
     messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
@@ -54,19 +51,21 @@ def respond(
             print(f"Added assistant message to context: {val[1]}")
     messages.append({"role": "user", "content": message})
     response = ""
-    print("Sending request to OpenAI API.")
-    for message in client.chat.completions.create(
         model=model_name,
         max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
-        messages=messages,
-    ):
-        token = message.choices[0].delta.content
         print(f"Received token: {token}")
         response += token
         yield response
@@ -74,16 +73,16 @@ def respond(
     print("Completed response generation.")
 models = [
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
-    "ngxson/MiniThinky-v2-1B-Llama-3.2",
-    "meta-llama/Llama-3.2-3B-Instruct",
     "PowerInfer/SmallThinker-3B-Preview",
     "NovaSky-AI/Sky-T1-32B-Preview",
     "Qwen/QwQ-32B-Preview",
     "Qwen/Qwen2.5-Coder-32B-Instruct",
     "microsoft/Phi-3-mini-128k-instruct",
 ]
 with gr.Blocks() as demo:

 #huggingface-llama-recipes : https://github.com/huggingface/huggingface-llama-recipes/tree/main
 import gradio as gr
+from huggingface_hub import InferenceClient
 import os
 ACCESS_TOKEN = os.getenv("myHFtoken")
 print("Access token loaded.")
+client = InferenceClient(api_key=ACCESS_TOKEN)
 print("Client initialized.")
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Selected model: {model_name}")
+    # Prepare messages for the Hugging Face API
     messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
             print(f"Added assistant message to context: {val[1]}")
     messages.append({"role": "user", "content": message})
     response = ""
+    print("Sending request to Hugging Face API.")
+    # Stream response from Hugging Face API
+    completion = client.chat.completions.create(
         model=model_name,
+        messages=messages,
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+        stream=True,
+    )
+    for message in completion:
+        token = message.delta.get("content", "")
         print(f"Received token: {token}")
         response += token
         yield response
     print("Completed response generation.")
 models = [
+    "meta-llama/Llama-3.2-3B-Instruct",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
     "PowerInfer/SmallThinker-3B-Preview",
     "NovaSky-AI/Sky-T1-32B-Preview",
     "Qwen/QwQ-32B-Preview",
     "Qwen/Qwen2.5-Coder-32B-Instruct",
     "microsoft/Phi-3-mini-128k-instruct",
+    "microsoft/phi-4"
 ]
 with gr.Blocks() as demo: