Spaces:

SkyNetWalker
/

HF-LLMs

Running

App Files Files Community

SkyNetWalker commited on 17 days ago

Commit

e528476

verified ·

1 Parent(s): 3bf4da9

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -15

app.py CHANGED Viewed

@@ -40,8 +40,8 @@ def respond(
     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Selected model: {model_name}")
-    # Prepare messages for the Hugging Face API
     messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
@@ -51,21 +51,21 @@ def respond(
             print(f"Added assistant message to context: {val[1]}")
     messages.append({"role": "user", "content": message})
     response = ""
-    print("Sending request to Hugging Face API.")
-    # Stream response from Hugging Face API
-    completion = client.chat.completions.create(
         model=model_name,
         messages=messages,
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
-        stream=True,
-    )
-    for message in completion:
-        token = message.delta.get("content", "")
         print(f"Received token: {token}")
         response += token
         yield response
@@ -73,16 +73,16 @@ def respond(
     print("Completed response generation.")
 models = [
-    "meta-llama/Llama-3.2-3B-Instruct",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
     "PowerInfer/SmallThinker-3B-Preview",
     "NovaSky-AI/Sky-T1-32B-Preview",
     "Qwen/QwQ-32B-Preview",
     "Qwen/Qwen2.5-Coder-32B-Instruct",
     "microsoft/Phi-3-mini-128k-instruct",
-    "microsoft/phi-4"
 ]
 with gr.Blocks() as demo:
@@ -95,7 +95,6 @@ with gr.Blocks() as demo:
             label="Select Model:"
         )
-    # Create the chat components separately
     chatbot = gr.Chatbot(height=500)
     msg = gr.Textbox(
         show_label=False,
@@ -104,7 +103,6 @@ with gr.Blocks() as demo:
     )
     clear = gr.Button("Clear")
-    # Additional inputs
     with gr.Accordion("Configuration", open=False):
         preset_prompt = gr.Dropdown(
             choices=list(SYSTEM_PROMPTS.keys()),
@@ -138,7 +136,6 @@ with gr.Blocks() as demo:
             label="Top-P:"
         )
-    # Set up the chat functionality
     def user(user_message, history):
         return "", history + [[user_message, None]]
@@ -182,4 +179,4 @@ print("Gradio interface initialized.")
 if __name__ == "__main__":
     print("Launching the demo application.")
-    demo.launch()

     print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
     print(f"Selected model: {model_name}")
     messages = [{"role": "system", "content": system_message}]
     for val in history:
         if val[0]:
             messages.append({"role": "user", "content": val[0]})
             print(f"Added assistant message to context: {val[1]}")
     messages.append({"role": "user", "content": message})
     response = ""
+    print("Sending request to Hugging Face API.")
+    for chunk in client.chat.completions.create(
         model=model_name,
         messages=messages,
         max_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+        stream=True
+    ):
+        # Correctly access the delta content from Hugging Face's response format
+        token = chunk.choices[0].delta.content or ""
         print(f"Received token: {token}")
         response += token
         yield response
     print("Completed response generation.")
 models = [
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
+    "ngxson/MiniThinky-v2-1B-Llama-3.2",
+    "meta-llama/Llama-3.2-3B-Instruct",
     "PowerInfer/SmallThinker-3B-Preview",
     "NovaSky-AI/Sky-T1-32B-Preview",
     "Qwen/QwQ-32B-Preview",
     "Qwen/Qwen2.5-Coder-32B-Instruct",
     "microsoft/Phi-3-mini-128k-instruct",
 ]
 with gr.Blocks() as demo:
             label="Select Model:"
         )
     chatbot = gr.Chatbot(height=500)
     msg = gr.Textbox(
         show_label=False,
     )
     clear = gr.Button("Clear")
     with gr.Accordion("Configuration", open=False):
         preset_prompt = gr.Dropdown(
             choices=list(SYSTEM_PROMPTS.keys()),
             label="Top-P:"
         )
     def user(user_message, history):
         return "", history + [[user_message, None]]
 if __name__ == "__main__":
     print("Launching the demo application.")
+    demo.launch()