Commit
·
297c9ff
1
Parent(s):
0650a69
Update app.py
Browse files- app.py +40 -60
- requirements.txt +1 -0
app.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
import requests
|
3 |
import json
|
4 |
import time
|
5 |
-
|
6 |
import gradio as gr
|
7 |
from transformers import AutoTokenizer
|
8 |
|
@@ -20,9 +20,9 @@ MediaTek Research Breeze-7B (hereinafter referred to as Breeze-7B) is a language
|
|
20 |
[Breeze-7B-Base](https://huggingface.co/MediaTek-Research/Breeze-7B-Base-v1_0) is the base model for the Breeze-7B series.
|
21 |
It is suitable for use if you have substantial fine-tuning data to tune it for your specific use case.
|
22 |
[Breeze-7B-Instruct](https://huggingface.co/MediaTek-Research/Breeze-7B-Instruct-v1_0) derives from the base model Breeze-7B-Base, making the resulting model amenable to be used as-is for commonly seen tasks.
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
"""
|
27 |
|
28 |
LICENSE = """
|
@@ -33,7 +33,7 @@ DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant built by MediaTek Resear
|
|
33 |
API_URL = os.environ.get("API_URL")
|
34 |
TOKEN = os.environ.get("TOKEN")
|
35 |
TOKENIZER_REPO = "MediaTek-Research/Breeze-7B-Instruct-v1_0"
|
36 |
-
|
37 |
|
38 |
HEADERS = {
|
39 |
"Authorization": f"Bearer {TOKEN}",
|
@@ -44,8 +44,32 @@ HEADERS = {
|
|
44 |
MAX_SEC = 30
|
45 |
MAX_INPUT_LENGTH = 5000
|
46 |
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
|
|
|
|
49 |
|
50 |
def refusal_condition(query):
|
51 |
# 不要再問這些問題啦!
|
@@ -123,40 +147,6 @@ with gr.Blocks() as demo:
|
|
123 |
def user(user_message, history):
|
124 |
return "", history + [[user_message, None]]
|
125 |
|
126 |
-
|
127 |
-
def connect_server(data):
|
128 |
-
for _ in range(3):
|
129 |
-
s = requests.Session()
|
130 |
-
r = s.post(API_URL, headers=HEADERS, json=data, stream=True, timeout=30)
|
131 |
-
time.sleep(1)
|
132 |
-
if r.status_code == 200:
|
133 |
-
return r
|
134 |
-
return None
|
135 |
-
|
136 |
-
|
137 |
-
def stream_response_from_server(r):
|
138 |
-
# start_time = time.time()
|
139 |
-
keep_streaming = True
|
140 |
-
for line in r.iter_lines():
|
141 |
-
# if time.time() - start_time > MAX_SEC:
|
142 |
-
# keep_streaming = False
|
143 |
-
# break
|
144 |
-
|
145 |
-
if line and keep_streaming:
|
146 |
-
if r.status_code != 200:
|
147 |
-
continue
|
148 |
-
json_response = json.loads(line)
|
149 |
-
|
150 |
-
if "fragment" not in json_response["result"]:
|
151 |
-
keep_streaming = False
|
152 |
-
break
|
153 |
-
|
154 |
-
delta = json_response["result"]["fragment"]["data"]["text"]
|
155 |
-
yield delta
|
156 |
-
|
157 |
-
# start_time = time.time()
|
158 |
-
|
159 |
-
|
160 |
def bot(history, max_new_tokens, temperature, top_p, system_prompt):
|
161 |
chat_data = []
|
162 |
system_prompt = system_prompt.strip()
|
@@ -166,32 +156,22 @@ with gr.Blocks() as demo:
|
|
166 |
chat_data.append({"role": "user", "content": user_msg if user_msg is not None else ''})
|
167 |
chat_data.append({"role": "assistant", "content": assistant_msg if assistant_msg is not None else ''})
|
168 |
|
169 |
-
message = tokenizer.apply_chat_template(chat_data, tokenize=False)
|
170 |
-
message = message[3:] # remove SOT token
|
171 |
-
|
172 |
-
if len(message) > MAX_INPUT_LENGTH:
|
173 |
-
raise Exception()
|
174 |
-
|
175 |
response = '[ERROR]'
|
176 |
if refusal_condition(history[-1][0]):
|
177 |
history = [['[安全拒答啟動]', '[安全拒答啟動] 請清除再開啟對話']]
|
178 |
response = '[REFUSAL]'
|
179 |
yield history
|
180 |
else:
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
}
|
190 |
-
}
|
191 |
-
|
192 |
-
r = connect_server(data)
|
193 |
if r is not None:
|
194 |
-
for delta in
|
195 |
if history[-1][1] is None:
|
196 |
history[-1][1] = ''
|
197 |
history[-1][1] += delta
|
@@ -210,7 +190,7 @@ with gr.Blocks() as demo:
|
|
210 |
del history[-1]
|
211 |
yield history
|
212 |
|
213 |
-
print('== Record ==\nQuery: {query}\nResponse: {response}'.format(query=repr(
|
214 |
|
215 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
216 |
fn=bot,
|
|
|
2 |
import requests
|
3 |
import json
|
4 |
import time
|
5 |
+
from openai import OpenAI
|
6 |
import gradio as gr
|
7 |
from transformers import AutoTokenizer
|
8 |
|
|
|
20 |
[Breeze-7B-Base](https://huggingface.co/MediaTek-Research/Breeze-7B-Base-v1_0) is the base model for the Breeze-7B series.
|
21 |
It is suitable for use if you have substantial fine-tuning data to tune it for your specific use case.
|
22 |
[Breeze-7B-Instruct](https://huggingface.co/MediaTek-Research/Breeze-7B-Instruct-v1_0) derives from the base model Breeze-7B-Base, making the resulting model amenable to be used as-is for commonly seen tasks.
|
23 |
+
|
24 |
+
This App is cloned from [Demo-MR-Breeze-7B](https://huggingface.co/spaces/MediaTek-Research/Demo-MR-Breeze-7B)
|
25 |
+
|
26 |
"""
|
27 |
|
28 |
LICENSE = """
|
|
|
33 |
API_URL = os.environ.get("API_URL")
|
34 |
TOKEN = os.environ.get("TOKEN")
|
35 |
TOKENIZER_REPO = "MediaTek-Research/Breeze-7B-Instruct-v1_0"
|
36 |
+
MODEL_NAME = os.environ.get("MODEL_NAME")
|
37 |
|
38 |
HEADERS = {
|
39 |
"Authorization": f"Bearer {TOKEN}",
|
|
|
44 |
MAX_SEC = 30
|
45 |
MAX_INPUT_LENGTH = 5000
|
46 |
|
47 |
+
client = OpenAI(
|
48 |
+
base_url=API_URL,
|
49 |
+
api_key=TOKEN
|
50 |
+
)
|
51 |
+
|
52 |
+
def chat_with_openai(client, model_name, system_message, user_message, temperature=0.5, max_tokens=1024, top_p=0.5):
|
53 |
+
chat_completion = client.chat.completions.create(
|
54 |
+
model=model_name,
|
55 |
+
messages=[
|
56 |
+
{
|
57 |
+
"role": "system",
|
58 |
+
"content": system_message
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"role": "user",
|
62 |
+
"content": user_message
|
63 |
+
}
|
64 |
+
],
|
65 |
+
temperature=temperature,
|
66 |
+
max_tokens=max_tokens,
|
67 |
+
top_p=top_p,
|
68 |
+
stream=True
|
69 |
+
)
|
70 |
|
71 |
+
for message in chat_completion:
|
72 |
+
yield message.choices[0].delta.content
|
73 |
|
74 |
def refusal_condition(query):
|
75 |
# 不要再問這些問題啦!
|
|
|
147 |
def user(user_message, history):
|
148 |
return "", history + [[user_message, None]]
|
149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
def bot(history, max_new_tokens, temperature, top_p, system_prompt):
|
151 |
chat_data = []
|
152 |
system_prompt = system_prompt.strip()
|
|
|
156 |
chat_data.append({"role": "user", "content": user_msg if user_msg is not None else ''})
|
157 |
chat_data.append({"role": "assistant", "content": assistant_msg if assistant_msg is not None else ''})
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
response = '[ERROR]'
|
160 |
if refusal_condition(history[-1][0]):
|
161 |
history = [['[安全拒答啟動]', '[安全拒答啟動] 請清除再開啟對話']]
|
162 |
response = '[REFUSAL]'
|
163 |
yield history
|
164 |
else:
|
165 |
+
r = chat_with_openai(
|
166 |
+
client,
|
167 |
+
MODEL_NAME,
|
168 |
+
system_prompt,
|
169 |
+
history[-1][0],
|
170 |
+
temperature,
|
171 |
+
max_new_tokens,
|
172 |
+
top_p)
|
|
|
|
|
|
|
|
|
173 |
if r is not None:
|
174 |
+
for delta in r:
|
175 |
if history[-1][1] is None:
|
176 |
history[-1][1] = ''
|
177 |
history[-1][1] += delta
|
|
|
190 |
del history[-1]
|
191 |
yield history
|
192 |
|
193 |
+
print('== Record ==\nQuery: {query}\nResponse: {response}'.format(query=repr(history[-1][0]), response=repr(history[-1][1])))
|
194 |
|
195 |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
196 |
fn=bot,
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
transformers==4.38.2
|
2 |
sentencepiece==0.2.0
|
3 |
tensorflow
|
|
|
1 |
+
openai
|
2 |
transformers==4.38.2
|
3 |
sentencepiece==0.2.0
|
4 |
tensorflow
|