Intellectualtech commited on
Commit
b00d97e
·
verified ·
1 Parent(s): ca93545

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -1,9 +1,12 @@
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
  from typing import List, Tuple
4
  import logging
5
  from collections import deque
6
  import re
 
7
 
8
  # Configure logging
9
  logging.basicConfig(
@@ -12,9 +15,12 @@ logging.basicConfig(
12
  )
13
  logger = logging.getLogger(__name__)
14
 
15
- # Initialize the InferenceClient
16
  try:
17
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 
 
 
18
  logger.info("Successfully initialized InferenceClient")
19
  except Exception as e:
20
  logger.error(f"Failed to initialize InferenceClient: {str(e)}")
@@ -97,21 +103,23 @@ def respond(
97
 
98
  response = ""
99
  try:
100
- for message in client.chat_completion(
101
  messages,
102
  max_tokens=max_tokens,
103
  stream=True,
104
  temperature=temperature,
105
  top_p=top_p,
106
- ):
 
107
  token = message.choices[0].delta.content or ""
108
  response += token
109
  yield response
110
  # Store the query and final response in memory
111
  add_to_memory(message, response)
112
  except Exception as e:
113
- logger.error(f"Error during chat completion: {str(e)}")
114
- raise RuntimeError("Failed to generate response from the model")
 
115
 
116
  def main():
117
  """
 
1
+
2
+
3
  import gradio as gr
4
  from huggingface_hub import InferenceClient
5
  from typing import List, Tuple
6
  import logging
7
  from collections import deque
8
  import re
9
+ import os
10
 
11
  # Configure logging
12
  logging.basicConfig(
 
15
  )
16
  logger = logging.getLogger(__name__)
17
 
18
+ # Initialize the InferenceClient with API token
19
  try:
20
+ client = InferenceClient(
21
+ model="HuggingFaceH4/zephyr-7b-beta",
22
+ token=os.getenv("HUGGINGFACEHUB_API_TOKEN")
23
+ )
24
  logger.info("Successfully initialized InferenceClient")
25
  except Exception as e:
26
  logger.error(f"Failed to initialize InferenceClient: {str(e)}")
 
103
 
104
  response = ""
105
  try:
106
+ stream = client.chat_completion(
107
  messages,
108
  max_tokens=max_tokens,
109
  stream=True,
110
  temperature=temperature,
111
  top_p=top_p,
112
+ )
113
+ for message in stream:
114
  token = message.choices[0].delta.content or ""
115
  response += token
116
  yield response
117
  # Store the query and final response in memory
118
  add_to_memory(message, response)
119
  except Exception as e:
120
+ error_msg = f"Error during chat completion: {str(e)}"
121
+ logger.error(error_msg)
122
+ yield error_msg # Yield the error message to display in Gradio
123
 
124
  def main():
125
  """