Spaces:

legaltextai
/

test_model

Runtime error

legaltextai commited on Feb 12

Commit

5b130c8

verified ·

1 Parent(s): c9732d8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,41 +2,36 @@ import gradio as gr
 import spaces
 from transformers import pipeline
-# Initialize model once at startup
 model = pipeline(
     "text-generation",
     model="unsloth/DeepSeek-R1-Distill-Llama-8B",
-    torch_dtype="auto",
-    device_map="auto"
 )
-@spaces.GPU(duration=120)
 def chat_response(message, history):
-    # Format conversation history for model input
-    messages = []
-    for human, assistant in history:
-        messages.extend([
-            {"role": "user", "content": human},
-            {"role": "assistant", "content": assistant}
-        ])
-    messages.append({"role": "user", "content": message})
-    # Generate response
-    response = model(
-        messages,
-        max_new_tokens=256,
-        temperature=0.7,
-        do_sample=True
-    )
-    return response[0]['generated_text'][-1]["content"]
-# Create chat interface
 demo = gr.ChatInterface(
     chat_response,
-    chatbot=gr.Chatbot(height=500),
     textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
-    title="DeepSeek-Llama-8B Chat Demo",
-    examples=[["Explain quantum computing simply"], ["Write a Python function for Fibonacci sequence"]]
 )
 demo.launch()

 import spaces
 from transformers import pipeline
 model = pipeline(
     "text-generation",
     model="unsloth/DeepSeek-R1-Distill-Llama-8B",
+    device_map="auto",
+    torch_dtype=torch.float16,  # Explicit dtype
+    model_kwargs={"load_in_8bit": True},  # Reduces VRAM usage
 )
+@spaces.GPU(duration=300)  # Increased to 5 minutes
 def chat_response(message, history):
+    # Add explicit initialization check
+    if not hasattr(chat_response, "pipe"):
+        chat_response.pipe = pipeline(...)
+    # Add timeout handling
+    try:
+        response = chat_response.pipe(...)
+        return response[0]['generated_text'][-1]["content"]
+    except RuntimeError as e:
+        return f"GPU timeout: {str(e)}"
 demo = gr.ChatInterface(
     chat_response,
+    chatbot=gr.Chatbot(height=500, type="messages"),  # Explicit type
     textbox=gr.Textbox(placeholder="Ask me anything...", container=False, scale=7),
+    title="DeepSeek-Llama-8B Chat",
+    examples=[["What is AI?"]],
+    retry_btn=None,
+    undo_btn=None
 )
 demo.launch()