import gradio as gr import os import requests API_TOKEN = os.environ.get("HF_HUB_API_TOKEN") # 或直接写 token API_URL = "https://api-inference.huggingface.co/models/Qwen/Qwen3-8B-Base" headers = { "Authorization": f"Bearer {API_TOKEN}", "Content-Type": "application/json", } def respond(message, history, system_message, max_tokens, temperature, top_p): payload = { "inputs": { "past_user_inputs": [m[0] for m in history], "generated_responses": [m[1] for m in history], "text": message }, "parameters": { "temperature": temperature, "max_new_tokens": max_tokens, "top_p": top_p } } try: response = requests.post(API_URL, headers=headers, json=payload) if response.status_code != 200: return f"[HTTP {response.status_code}] {response.text}" result = response.json() if isinstance(result, dict) and result.get("error"): return f"[ERROR] {result['error']}" return result[0]["generated_text"] except Exception as e: return f"[Exception] {str(e)}" demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a helpful assistant.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), ], ) if __name__ == "__main__": demo.launch()