amurienne commited on
Commit
993e8ac
·
verified ·
1 Parent(s): a6fe8fb

now using hf inference api for chat model

Browse files
Files changed (1) hide show
  1. app.py +55 -33
app.py CHANGED
@@ -1,29 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
  from transformers import (
4
  AutoTokenizer,
5
  AutoModelForSeq2SeqLM,
6
- AutoModelForCausalLM,
7
- BitsAndBytesConfig,
8
  pipeline
9
  )
10
 
11
- import torch
12
 
13
  # CHAT MODEL
14
 
15
- chat_model_id = "MaziyarPanahi/Llama-3.2-3B-Instruct-GGUF"
16
- chat_gguf = "Llama-3.2-3B-Instruct.Q4_K_M.gguf"
 
 
 
 
 
17
 
18
- tokenizer = AutoTokenizer.from_pretrained(chat_model_id, gguf_file=chat_gguf)
19
- model = AutoModelForCausalLM.from_pretrained(chat_model_id, gguf_file=chat_gguf)
 
 
 
20
 
21
- chat_pipeline = pipeline('text-generation', model=model, tokenizer=tokenizer, do_sample=True, temperature=0.5, truncation=True, max_length=512, return_full_text=False)
22
 
23
  # TRANSLATION MODELS
24
 
25
- fw_modelcard = "amurienne/gallek-m2m100"
26
- bw_modelcard = "amurienne/kellag-m2m100"
27
 
28
  fw_model = AutoModelForSeq2SeqLM.from_pretrained(fw_modelcard)
29
  fw_tokenizer = AutoTokenizer.from_pretrained(fw_modelcard)
@@ -42,21 +72,6 @@ def translate(text, forward: bool):
42
  else:
43
  return bw_translation_pipeline("treiñ eus ar galleg d'ar brezhoneg: " + text)[0]['translation_text']
44
 
45
- # answer function
46
- def answer(text):
47
- return chat_pipeline(text, chat_template=None)[0]['generated_text']
48
-
49
- def format_prompt_with_history(message, native_chat_history):
50
- # format the conversation history
51
- prompt = ""
52
- for interaction in native_chat_history:
53
- prompt += f"<|start_header_id|>{interaction['role']}<|end_header_id|>\n{interaction['content']}<|eot_id|>\n"
54
-
55
- # add the current user message
56
- prompt += f"<|start_header_id|>user<|end_header_id|>\ntu es un assistant francophone. Répond en une seule phrase sans formattage.\n{message}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n"
57
-
58
- return prompt
59
-
60
  # maximum number of interactions to keep in history
61
  max_history_length = 3
62
 
@@ -64,10 +79,13 @@ max_history_length = 3
64
  native_chat_history = []
65
 
66
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
67
-
68
  gr.Markdown("# BreizhBot\n## Breton Chatbot (Translation based)\nPart of the [GweLLM](https://github.com/blackccpie/GweLLM) project")
69
 
70
- chatbot = gr.Chatbot(label="Chat", type="messages")
 
 
 
71
  msg = gr.Textbox(label='User Input')
72
 
73
  def clear(chat_history):
@@ -79,6 +97,13 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
79
 
80
  chatbot.clear(clear, inputs=[chatbot])
81
 
 
 
 
 
 
 
 
82
  def respond(message, chat_history):
83
  """
84
  Handles bot response generation
@@ -89,14 +114,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
89
  fr_message = translate(message, forward=False)
90
  print(f"user fr -> {fr_message}")
91
 
92
- prompt = format_prompt_with_history(fr_message, native_chat_history)
93
-
94
- bot_fr_message = answer(prompt)
95
  print(f"bot fr -> {bot_fr_message}")
96
  bot_br_message = translate( bot_fr_message, forward=True)
97
  print(f"bot br -> {bot_br_message}")
98
-
99
- chat_history.append({"role": "user", "content": message})
100
  chat_history.append({"role": "assistant", "content": bot_br_message})
101
 
102
  native_chat_history.append({"role": "user", "content": fr_message})
@@ -109,7 +131,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
109
 
110
  return "", chat_history
111
 
112
- msg.submit(respond, [msg, chatbot], [msg, chatbot])
113
 
114
  if __name__ == "__main__":
115
  demo.launch()
 
1
+ # The MIT License
2
+
3
+ # Copyright (c) 2025 Albert Murienne
4
+
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+
12
+ # The above copyright notice and this permission notice shall be included in
13
+ # all copies or substantial portions of the Software.
14
+
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ # THE SOFTWARE.
22
+
23
+ import os
24
+
25
  import gradio as gr
26
 
27
  from transformers import (
28
  AutoTokenizer,
29
  AutoModelForSeq2SeqLM,
 
 
30
  pipeline
31
  )
32
 
33
+ from huggingface_hub import InferenceClient
34
 
35
  # CHAT MODEL
36
 
37
+ class chat_engine_hf_api:
38
+
39
+ def __init__(self):
40
+ self.client = InferenceClient(
41
+ "meta-llama/Llama-3.2-3B-Instruct",
42
+ token=os.environ['HF_TOKEN_API']
43
+ )
44
 
45
+ def answer(self, message, history):
46
+ return self.client.chat_completion(
47
+ history + [{"role": "user", "content": f"tu es un assistant francophone. Répond en une seule phrase sans formattage.\n{message}"}],
48
+ max_tokens=512,
49
+ temperature = 0.5).choices[0].message.content
50
 
51
+ chat_engine = chat_engine_hf_api()
52
 
53
  # TRANSLATION MODELS
54
 
55
+ fw_modelcard = "../gallek/gallek-m2m100-b51"
56
+ bw_modelcard = "../gallek/kellag-m2m100-b51"
57
 
58
  fw_model = AutoModelForSeq2SeqLM.from_pretrained(fw_modelcard)
59
  fw_tokenizer = AutoTokenizer.from_pretrained(fw_modelcard)
 
72
  else:
73
  return bw_translation_pipeline("treiñ eus ar galleg d'ar brezhoneg: " + text)[0]['translation_text']
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # maximum number of interactions to keep in history
76
  max_history_length = 3
77
 
 
79
  native_chat_history = []
80
 
81
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
82
+
83
  gr.Markdown("# BreizhBot\n## Breton Chatbot (Translation based)\nPart of the [GweLLM](https://github.com/blackccpie/GweLLM) project")
84
 
85
+ chatbot = gr.Chatbot(
86
+ label="Chat",
87
+ placeholder="Degemer mat, petra a c'hellan ober evidoc'h ?",
88
+ type="messages")
89
  msg = gr.Textbox(label='User Input')
90
 
91
  def clear(chat_history):
 
97
 
98
  chatbot.clear(clear, inputs=[chatbot])
99
 
100
+ def user_input(message, chat_history):
101
+ """
102
+ Handles instant display of the user query (without waiting for model answer)
103
+ """
104
+ chat_history.append({"role": "user", "content": message})
105
+ return chat_history
106
+
107
  def respond(message, chat_history):
108
  """
109
  Handles bot response generation
 
114
  fr_message = translate(message, forward=False)
115
  print(f"user fr -> {fr_message}")
116
 
117
+ bot_fr_message = chat_engine.answer(fr_message, native_chat_history)
 
 
118
  print(f"bot fr -> {bot_fr_message}")
119
  bot_br_message = translate( bot_fr_message, forward=True)
120
  print(f"bot br -> {bot_br_message}")
121
+
 
122
  chat_history.append({"role": "assistant", "content": bot_br_message})
123
 
124
  native_chat_history.append({"role": "user", "content": fr_message})
 
131
 
132
  return "", chat_history
133
 
134
+ msg.submit(user_input, [msg, chatbot], chatbot).then(respond, [msg, chatbot], [msg, chatbot])
135
 
136
  if __name__ == "__main__":
137
  demo.launch()