legaltextai commited on
Commit
5b130c8
·
verified ·
1 Parent(s): c9732d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -25
app.py CHANGED
@@ -2,41 +2,36 @@ import gradio as gr
2
  import spaces
3
  from transformers import pipeline
4
 
5
- # Initialize model once at startup
6
  model = pipeline(
7
  "text-generation",
8
  model="unsloth/DeepSeek-R1-Distill-Llama-8B",
9
- torch_dtype="auto",
10
- device_map="auto"
 
11
  )
12
 
13
- @spaces.GPU(duration=120)
14
  def chat_response(message, history):
15
- # Format conversation history for model input
16
- messages = []
17
- for human, assistant in history:
18
- messages.extend([
19
- {"role": "user", "content": human},
20
- {"role": "assistant", "content": assistant}
21
- ])
22
- messages.append({"role": "user", "content": message})
23
 
24
- # Generate response
25
- response = model(
26
- messages,
27
- max_new_tokens=256,
28
- temperature=0.7,
29
- do_sample=True
30
- )
31
-
32
- return response[0]['generated_text'][-1]["content"]
33
 
34
- # Create chat interface
35
  demo = gr.ChatInterface(
36
  chat_response,
37
- chatbot=gr.Chatbot(height=500),
38
  textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
39
- title="DeepSeek-Llama-8B Chat Demo",
40
- examples=[["Explain quantum computing simply"], ["Write a Python function for Fibonacci sequence"]]
 
 
41
  )
 
42
  demo.launch()
 
2
  import spaces
3
  from transformers import pipeline
4
 
 
5
  model = pipeline(
6
  "text-generation",
7
  model="unsloth/DeepSeek-R1-Distill-Llama-8B",
8
+ device_map="auto",
9
+ torch_dtype=torch.float16, # Explicit dtype
10
+ model_kwargs={"load_in_8bit": True}, # Reduces VRAM usage
11
  )
12
 
13
+ @spaces.GPU(duration=300) # Increased to 5 minutes
14
  def chat_response(message, history):
15
+ # Add explicit initialization check
16
+ if not hasattr(chat_response, "pipe"):
17
+ chat_response.pipe = pipeline(...)
 
 
 
 
 
18
 
19
+ # Add timeout handling
20
+ try:
21
+ response = chat_response.pipe(...)
22
+ return response[0]['generated_text'][-1]["content"]
23
+ except RuntimeError as e:
24
+ return f"GPU timeout: {str(e)}"
25
+
 
 
26
 
 
27
  demo = gr.ChatInterface(
28
  chat_response,
29
+ chatbot=gr.Chatbot(height=500, type="messages"), # Explicit type
30
  textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
31
+ title="DeepSeek-Llama-8B Chat",
32
+ examples=[["What is AI?"]],
33
+ retry_btn=None,
34
+ undo_btn=None
35
  )
36
+
37
  demo.launch()