laloadrianmorales commited on
Commit
1e6d961
Β·
verified Β·
1 Parent(s): 790159d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +406 -48
app.py CHANGED
@@ -1,64 +1,422 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
 
 
 
 
 
25
 
 
 
 
 
 
 
 
26
  messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
27
 
 
 
 
 
 
 
28
  response = ""
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
41
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ from typing import List, Tuple
4
+ import json
5
+ import time
6
 
7
+ # Configure the model and provider
8
+ MODEL_ID = "openai/gpt-oss-120b"
9
+ DEFAULT_PROVIDER = "groq" # Can be changed to fireworks, hyperbolic, etc.
 
10
 
11
+ # System prompts for different modes
12
+ SYSTEM_PROMPTS = {
13
+ "default": "You are a helpful AI assistant.",
14
+ "creative": "You are a creative and imaginative AI that thinks outside the box.",
15
+ "technical": "You are a technical expert AI that provides detailed, accurate technical information.",
16
+ "concise": "You are a concise AI that provides brief, to-the-point responses.",
17
+ "teacher": "You are a patient teacher who explains concepts clearly with examples.",
18
+ "coder": "You are an expert programmer who writes clean, efficient, well-commented code.",
19
+ }
20
 
21
+ # CSS for dark theme and custom styling
22
+ custom_css = """
23
+ #chatbot {
24
+ height: 600px !important;
25
+ background: #0a0a0a;
26
+ }
27
+ #chatbot .message {
28
+ font-size: 14px;
29
+ line-height: 1.6;
30
+ }
31
+ .dark {
32
+ background: #0a0a0a;
33
+ }
34
+ .user-message {
35
+ background: rgba(0, 255, 136, 0.1) !important;
36
+ border-left: 3px solid #00ff88;
37
+ }
38
+ .assistant-message {
39
+ background: rgba(0, 255, 255, 0.05) !important;
40
+ border-left: 3px solid #00ffff;
41
+ }
42
+ .footer {
43
+ text-align: center;
44
+ padding: 20px;
45
+ color: #666;
46
+ }
47
+ """
48
 
49
+ def format_message_history(history: List[Tuple[str, str]], system_prompt: str) -> List[dict]:
50
+ """Format chat history for the model"""
51
+ messages = []
52
+
53
+ if system_prompt:
54
+ messages.append({"role": "system", "content": system_prompt})
55
+
56
+ for user_msg, assistant_msg in history:
57
+ if user_msg:
58
+ messages.append({"role": "user", "content": user_msg})
59
+ if assistant_msg:
60
+ messages.append({"role": "assistant", "content": assistant_msg})
61
+
62
+ return messages
63
 
64
+ def stream_response(message: str, history: List[Tuple[str, str]],
65
+ system_prompt: str, temperature: float, max_tokens: int,
66
+ top_p: float, provider: str):
67
+ """Generate streaming response from the model"""
68
+
69
+ # Format messages for the model
70
+ messages = format_message_history(history, system_prompt)
71
  messages.append({"role": "user", "content": message})
72
+
73
+ # Simulate streaming for demo (replace with actual API call)
74
+ # In production, you'd use the actual provider API here
75
+ demo_response = f"""I'm GPT-OSS-120B running on {provider}!
76
+
77
+ I received your message: "{message}"
78
+
79
+ With these settings:
80
+ - Temperature: {temperature}
81
+ - Max tokens: {max_tokens}
82
+ - Top-p: {top_p}
83
+ - System prompt: {system_prompt[:50]}...
84
 
85
+ This is where the actual model response would appear. In production, this would connect to the {provider} API to generate real responses from the 120B parameter model.
86
+
87
+ The model would analyze your input and provide a detailed, thoughtful response based on its massive 120 billion parameters of knowledge."""
88
+
89
+ # Simulate streaming effect
90
+ words = demo_response.split()
91
  response = ""
92
+ for i in range(0, len(words), 3):
93
+ chunk = " ".join(words[i:i+3])
94
+ response += chunk + " "
95
+ time.sleep(0.05) # Simulate streaming delay
96
+ yield response.strip()
97
 
98
+ def clear_chat():
99
+ """Clear the chat history"""
100
+ return None, []
 
 
 
 
 
101
 
102
+ def undo_last(history):
103
+ """Remove the last message from history"""
104
+ if history:
105
+ return history[:-1]
106
+ return history
107
 
108
+ def retry_last(message, history):
109
+ """Retry the last message"""
110
+ if history and history[-1][0]:
111
+ last_message = history[-1][0]
112
+ return last_message, history[:-1]
113
+ return message, history
114
 
115
+ def load_example(example):
116
+ """Load an example prompt"""
117
+ return example
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
+ # Create the Gradio interface
120
+ with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="GPT-OSS-120B Chat") as demo:
121
+
122
+ # Header
123
+ gr.Markdown(
124
+ """
125
+ # 🧠 GPT-OSS-120B Mega Chat
126
+ ### 120 Billion Parameters of Pure Intelligence πŸš€
127
+
128
+ Chat with OpenAI's massive GPT-OSS-120B model - one of the largest open-weight models available!
129
+ """
130
+ )
131
+
132
+ # Main chat interface
133
+ with gr.Row():
134
+ # Chat column
135
+ with gr.Column(scale=3):
136
+ chatbot = gr.Chatbot(
137
+ label="Chat",
138
+ elem_id="chatbot",
139
+ bubble_full_width=False,
140
+ show_copy_button=True,
141
+ height=500,
142
+ type="tuples"
143
+ )
144
+
145
+ # Input area
146
+ with gr.Row():
147
+ msg = gr.Textbox(
148
+ label="Message",
149
+ placeholder="Ask anything... (Shift+Enter for new line, Enter to send)",
150
+ lines=3,
151
+ max_lines=10,
152
+ scale=5,
153
+ elem_classes="user-input"
154
+ )
155
+
156
+ with gr.Column(scale=1, min_width=80):
157
+ send_btn = gr.Button("Send πŸ“€", variant="primary", size="lg")
158
+ stop_btn = gr.Button("Stop ⏹️", variant="stop", size="lg", visible=False)
159
+
160
+ # Action buttons
161
+ with gr.Row():
162
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", size="sm")
163
+ undo_btn = gr.Button("↩️ Undo", size="sm")
164
+ retry_btn = gr.Button("πŸ”„ Retry", size="sm")
165
+
166
+ # Settings column
167
+ with gr.Column(scale=1):
168
+ # Provider selection
169
+ with gr.Accordion("πŸ”Œ Inference Provider", open=True):
170
+ provider = gr.Dropdown(
171
+ label="Provider",
172
+ choices=["groq", "fireworks", "hyperbolic", "together", "anyscale"],
173
+ value=DEFAULT_PROVIDER,
174
+ info="Choose your inference provider"
175
+ )
176
+
177
+ login_btn = gr.Button("πŸ” Sign in with HuggingFace", size="sm")
178
+
179
+ # Model settings
180
+ with gr.Accordion("βš™οΈ Model Settings", open=True):
181
+ system_mode = gr.Dropdown(
182
+ label="System Mode",
183
+ choices=list(SYSTEM_PROMPTS.keys()),
184
+ value="default",
185
+ info="Preset system prompts"
186
+ )
187
+
188
+ system_prompt = gr.Textbox(
189
+ label="Custom System Prompt",
190
+ value=SYSTEM_PROMPTS["default"],
191
+ lines=3,
192
+ info="Override with custom instructions"
193
+ )
194
+
195
+ temperature = gr.Slider(
196
+ label="Temperature",
197
+ minimum=0.0,
198
+ maximum=2.0,
199
+ value=0.7,
200
+ step=0.05,
201
+ info="Higher = more creative, Lower = more focused"
202
+ )
203
+
204
+ max_tokens = gr.Slider(
205
+ label="Max Tokens",
206
+ minimum=64,
207
+ maximum=8192,
208
+ value=2048,
209
+ step=64,
210
+ info="Maximum response length"
211
+ )
212
+
213
+ top_p = gr.Slider(
214
+ label="Top-p (Nucleus Sampling)",
215
+ minimum=0.1,
216
+ maximum=1.0,
217
+ value=0.95,
218
+ step=0.05,
219
+ info="Controls response diversity"
220
+ )
221
+
222
+ with gr.Row():
223
+ seed = gr.Number(
224
+ label="Seed",
225
+ value=-1,
226
+ info="Set for reproducible outputs (-1 for random)"
227
+ )
228
+
229
+ # Advanced settings
230
+ with gr.Accordion("πŸ”¬ Advanced", open=False):
231
+ stream_output = gr.Checkbox(
232
+ label="Stream Output",
233
+ value=True,
234
+ info="Show response as it's generated"
235
+ )
236
+
237
+ show_reasoning = gr.Checkbox(
238
+ label="Show Reasoning Process",
239
+ value=False,
240
+ info="Display chain-of-thought if available"
241
+ )
242
+
243
+ reasoning_lang = gr.Dropdown(
244
+ label="Reasoning Language",
245
+ choices=["English", "Spanish", "French", "German", "Chinese", "Japanese"],
246
+ value="English",
247
+ info="Language for reasoning process"
248
+ )
249
+
250
+ # Model info
251
+ with gr.Accordion("πŸ“Š Model Info", open=False):
252
+ gr.Markdown(
253
+ """
254
+ **Model**: openai/gpt-oss-120b
255
+ - **Parameters**: 120 Billion
256
+ - **Architecture**: Transformer + MoE
257
+ - **Context**: 128K tokens
258
+ - **Training**: Multi-lingual, code, reasoning
259
+ - **License**: Open weight
260
+
261
+ **Capabilities**:
262
+ - Complex reasoning
263
+ - Code generation
264
+ - Creative writing
265
+ - Technical analysis
266
+ - Multi-lingual support
267
+ - Function calling
268
+ """
269
+ )
270
+
271
+ # Examples section
272
+ with gr.Accordion("πŸ’‘ Example Prompts", open=True):
273
+ examples = gr.Examples(
274
+ examples=[
275
+ "Explain quantum computing to a 10-year-old",
276
+ "Write a Python function to detect palindromes with O(1) space complexity",
277
+ "What are the implications of AGI for society?",
278
+ "Create a detailed business plan for a sustainable energy startup",
279
+ "Translate 'Hello, how are you?' to 10 different languages",
280
+ "Debug this code: `def fib(n): return fib(n-1) + fib(n-2)`",
281
+ "Write a haiku about machine learning",
282
+ "Compare and contrast transformers vs RNNs for NLP tasks",
283
+ ],
284
+ inputs=msg,
285
+ label="Click to load an example"
286
+ )
287
+
288
+ # Stats and info
289
+ with gr.Row():
290
+ with gr.Column():
291
+ token_count = gr.Textbox(
292
+ label="Token Count",
293
+ value="0 tokens",
294
+ interactive=False,
295
+ scale=1
296
+ )
297
+ with gr.Column():
298
+ response_time = gr.Textbox(
299
+ label="Response Time",
300
+ value="0.0s",
301
+ interactive=False,
302
+ scale=1
303
+ )
304
+ with gr.Column():
305
+ model_status = gr.Textbox(
306
+ label="Status",
307
+ value="🟒 Ready",
308
+ interactive=False,
309
+ scale=1
310
+ )
311
+
312
+ # Event handlers
313
+ def update_system_prompt(mode):
314
+ return SYSTEM_PROMPTS.get(mode, SYSTEM_PROMPTS["default"])
315
+
316
+ def user_submit(message, history):
317
+ if not message.strip():
318
+ return "", history
319
+ return "", history + [(message, None)]
320
+
321
+ def bot_respond(history, system_prompt, temperature, max_tokens, top_p, provider):
322
+ if not history or history[-1][1] is not None:
323
+ return history
324
+
325
+ message = history[-1][0]
326
+
327
+ # Generate response (streaming)
328
+ bot_message = ""
329
+ for chunk in stream_response(
330
+ message,
331
+ history[:-1],
332
+ system_prompt,
333
+ temperature,
334
+ max_tokens,
335
+ top_p,
336
+ provider
337
+ ):
338
+ bot_message = chunk
339
+ history[-1] = (message, bot_message)
340
+ yield history
341
+
342
+ # Connect event handlers
343
+ system_mode.change(
344
+ update_system_prompt,
345
+ inputs=[system_mode],
346
+ outputs=[system_prompt]
347
+ )
348
+
349
+ # Message submission
350
+ msg.submit(
351
+ user_submit,
352
+ [msg, chatbot],
353
+ [msg, chatbot],
354
+ queue=False
355
+ ).then(
356
+ bot_respond,
357
+ [chatbot, system_prompt, temperature, max_tokens, top_p, provider],
358
+ chatbot
359
+ )
360
+
361
+ send_btn.click(
362
+ user_submit,
363
+ [msg, chatbot],
364
+ [msg, chatbot],
365
+ queue=False
366
+ ).then(
367
+ bot_respond,
368
+ [chatbot, system_prompt, temperature, max_tokens, top_p, provider],
369
+ chatbot
370
+ )
371
+
372
+ # Action buttons
373
+ clear_btn.click(
374
+ lambda: (None, ""),
375
+ outputs=[chatbot, msg],
376
+ queue=False
377
+ )
378
+
379
+ undo_btn.click(
380
+ undo_last,
381
+ inputs=[chatbot],
382
+ outputs=[chatbot],
383
+ queue=False
384
+ )
385
+
386
+ retry_btn.click(
387
+ retry_last,
388
+ inputs=[msg, chatbot],
389
+ outputs=[msg, chatbot],
390
+ queue=False
391
+ ).then(
392
+ bot_respond,
393
+ [chatbot, system_prompt, temperature, max_tokens, top_p, provider],
394
+ chatbot
395
+ )
396
+
397
+ # Login button
398
+ login_btn.click(
399
+ lambda: gr.Info("Please implement HuggingFace OAuth login"),
400
+ queue=False
401
+ )
402
+
403
+ # Footer
404
+ gr.Markdown(
405
+ """
406
+ <div class='footer'>
407
+ <p>Built with πŸ”₯ for the GPT-OSS-120B community | Model: openai/gpt-oss-120b</p>
408
+ <p>Remember: This is a 120 billion parameter model - expect incredible responses!</p>
409
+ </div>
410
+ """,
411
+ elem_classes="footer"
412
+ )
413
 
414
+ # Launch configuration
415
  if __name__ == "__main__":
416
+ demo.queue(max_size=50, default_concurrency_limit=10)
417
+ demo.launch(
418
+ server_name="0.0.0.0",
419
+ share=False,
420
+ show_error=True,
421
+ server_port=7860,
422
+ favicon_path=None