Spaces:

MarkChen1214
/

Breeze-7B-Inference-Endpoints

Sleeping

App Files Files Community

MarkChen1214 commited on Jun 23, 2024

Commit

297c9ff

1 Parent(s): 0650a69

Update app.py

Browse files

Files changed (2) hide show

app.py +40 -60
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import requests
 import json
 import time
 import gradio as gr
 from transformers import AutoTokenizer
@@ -20,9 +20,9 @@ MediaTek Research Breeze-7B (hereinafter referred to as Breeze-7B) is a language
 [Breeze-7B-Base](https://huggingface.co/MediaTek-Research/Breeze-7B-Base-v1_0) is the base model for the Breeze-7B series.
 It is suitable for use if you have substantial fine-tuning data to tune it for your specific use case.
 [Breeze-7B-Instruct](https://huggingface.co/MediaTek-Research/Breeze-7B-Instruct-v1_0) derives from the base model Breeze-7B-Base, making the resulting model amenable to be used as-is for commonly seen tasks.
-The current release version of Breeze-7B is v1.0.
-*A project by the members (in alphabetical order): Chan-Jan Hsu 許湛然, Chang-Le Liu 劉昶樂, Feng-Ting Liao 廖峰挺, Po-Chun Hsu 許博竣, Yi-Chang Chen 陳宜昌, and the supervisor Da-Shan Shiu 許大山.*
-**免責聲明: MediaTek Research Breeze-7B 並未針對問答進行安全保護，因此語言模型的任何回應不代表 MediaTek Research 立場。**
 """
 LICENSE = """
@@ -33,7 +33,7 @@ DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant built by MediaTek Resear
 API_URL = os.environ.get("API_URL")
 TOKEN = os.environ.get("TOKEN")
 TOKENIZER_REPO = "MediaTek-Research/Breeze-7B-Instruct-v1_0"
-API_MODEL_TYPE = "breeze-7b-instruct-v10"
 HEADERS = {
     "Authorization": f"Bearer {TOKEN}",
@@ -44,8 +44,32 @@ HEADERS = {
 MAX_SEC = 30
 MAX_INPUT_LENGTH = 5000
-tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_REPO, use_auth_token=os.environ.get("HF_TOKEN"))
 def refusal_condition(query):
     # 不要再問這些問題啦！
@@ -123,40 +147,6 @@ with gr.Blocks() as demo:
     def user(user_message, history):
         return "", history + [[user_message, None]]
-    def connect_server(data):
-        for _ in range(3):
-            s = requests.Session()
-            r = s.post(API_URL, headers=HEADERS, json=data, stream=True, timeout=30)
-            time.sleep(1)
-            if r.status_code == 200:
-                return r
-        return None
-    def stream_response_from_server(r):
-        # start_time = time.time()
-        keep_streaming = True
-        for line in r.iter_lines():
-            # if time.time() - start_time > MAX_SEC:
-            #     keep_streaming = False
-            #     break
-            if line and keep_streaming:
-                if r.status_code != 200:
-                    continue
-                json_response = json.loads(line)
-                if "fragment" not in json_response["result"]:
-                    keep_streaming = False
-                    break
-                delta = json_response["result"]["fragment"]["data"]["text"]
-                yield delta
-            # start_time = time.time()
     def bot(history, max_new_tokens, temperature, top_p, system_prompt):
         chat_data = []
         system_prompt = system_prompt.strip()
@@ -166,32 +156,22 @@ with gr.Blocks() as demo:
             chat_data.append({"role": "user", "content": user_msg if user_msg is not None else ''})
             chat_data.append({"role": "assistant", "content": assistant_msg if assistant_msg is not None else ''})
-        message = tokenizer.apply_chat_template(chat_data, tokenize=False)
-        message = message[3:]  # remove SOT token
-        if len(message) > MAX_INPUT_LENGTH:
-            raise Exception()
         response = '[ERROR]'
         if refusal_condition(history[-1][0]):
             history = [['[安全拒答啟動]', '[安全拒答啟動] 請清除再開啟對話']]
             response = '[REFUSAL]'
             yield history
         else:
-            data = {
-                "model_type": API_MODEL_TYPE,
-                "prompt": str(message),
-                "parameters": {
-                    "temperature": float(temperature),
-                    "top_p": float(top_p),
-                    "max_new_tokens": int(max_new_tokens),
-                    "repetition_penalty": 1.1
-                }
-            }
-            r = connect_server(data)
             if r is not None:
-                for delta in stream_response_from_server(r):
                     if history[-1][1] is None:
                         history[-1][1] = ''
                     history[-1][1] += delta
@@ -210,7 +190,7 @@ with gr.Blocks() as demo:
                 del history[-1]
                 yield history
-        print('== Record ==\nQuery: {query}\nResponse: {response}'.format(query=repr(message), response=repr(history[-1][1])))
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         fn=bot,

 import requests
 import json
 import time
+from openai import OpenAI
 import gradio as gr
 from transformers import AutoTokenizer
 [Breeze-7B-Base](https://huggingface.co/MediaTek-Research/Breeze-7B-Base-v1_0) is the base model for the Breeze-7B series.
 It is suitable for use if you have substantial fine-tuning data to tune it for your specific use case.
 [Breeze-7B-Instruct](https://huggingface.co/MediaTek-Research/Breeze-7B-Instruct-v1_0) derives from the base model Breeze-7B-Base, making the resulting model amenable to be used as-is for commonly seen tasks.
+This App is cloned from [Demo-MR-Breeze-7B](https://huggingface.co/spaces/MediaTek-Research/Demo-MR-Breeze-7B)
 """
 LICENSE = """
 API_URL = os.environ.get("API_URL")
 TOKEN = os.environ.get("TOKEN")
 TOKENIZER_REPO = "MediaTek-Research/Breeze-7B-Instruct-v1_0"
+MODEL_NAME = os.environ.get("MODEL_NAME")
 HEADERS = {
     "Authorization": f"Bearer {TOKEN}",
 MAX_SEC = 30
 MAX_INPUT_LENGTH = 5000
+client = OpenAI(
+	base_url=API_URL,
+	api_key=TOKEN
+)
+def chat_with_openai(client, model_name, system_message, user_message, temperature=0.5, max_tokens=1024, top_p=0.5):
+    chat_completion = client.chat.completions.create(
+        model=model_name,
+        messages=[
+            {
+                "role": "system",
+                "content": system_message
+            },
+            {
+                "role": "user",
+                "content": user_message
+            }
+        ],
+        temperature=temperature,
+        max_tokens=max_tokens,
+        top_p=top_p,
+        stream=True
+    )
+    for message in chat_completion:
+        yield message.choices[0].delta.content
 def refusal_condition(query):
     # 不要再問這些問題啦！
     def user(user_message, history):
         return "", history + [[user_message, None]]
     def bot(history, max_new_tokens, temperature, top_p, system_prompt):
         chat_data = []
         system_prompt = system_prompt.strip()
             chat_data.append({"role": "user", "content": user_msg if user_msg is not None else ''})
             chat_data.append({"role": "assistant", "content": assistant_msg if assistant_msg is not None else ''})
         response = '[ERROR]'
         if refusal_condition(history[-1][0]):
             history = [['[安全拒答啟動]', '[安全拒答啟動] 請清除再開啟對話']]
             response = '[REFUSAL]'
             yield history
         else:
+            r = chat_with_openai(
+                client,
+                MODEL_NAME,
+                system_prompt,
+                history[-1][0],
+                temperature,
+                max_new_tokens,
+                top_p)
             if r is not None:
+                for delta in r:
                     if history[-1][1] is None:
                         history[-1][1] = ''
                     history[-1][1] += delta
                 del history[-1]
                 yield history
+        print('== Record ==\nQuery: {query}\nResponse: {response}'.format(query=repr(history[-1][0]), response=repr(history[-1][1])))
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         fn=bot,

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 transformers==4.38.2
 sentencepiece==0.2.0
 tensorflow

+openai
 transformers==4.38.2
 sentencepiece==0.2.0
 tensorflow