Wassupbro123 commited on
Commit
62712cc
·
verified ·
1 Parent(s): 11dea74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -18
app.py CHANGED
@@ -1,17 +1,12 @@
1
- import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
- from transformers import AutoTokenizer, AutoModelForImageTextToText
5
 
6
- # ── Hugging Face トークンを環境変数にセット(ご自身のトークンを設定してください)
7
- os.environ["HUGGINGFACE_TOKEN"] = "YOUR_HF_TOKEN"
8
-
9
- # ── ② InferenceClient を生成
10
- client = InferenceClient(api_key=os.environ["HUGGINGFACE_TOKEN"])
11
-
12
- # (※ローカル推論は使わない場合、以下のモデルロードは不要です)
13
- tokenizer = AutoTokenizer.from_pretrained("mlabonne/gemma-3-27b-it-abliterated")
14
- model = AutoModelForImageTextToText.from_pretrained("mlabonne/gemma-3-27b-it-abliterated")
15
 
16
  def respond(
17
  message,
@@ -21,21 +16,18 @@ def respond(
21
  temperature,
22
  top_p,
23
  ):
24
- # システムプロンプトを先頭に
25
  messages = [{"role": "system", "content": system_message}]
26
- # 過去のやり取りを追加
27
  for u, a in history:
28
  if u:
29
  messages.append({"role": "user", "content": u})
30
  if a:
31
  messages.append({"role": "assistant", "content": a})
32
- # 最新ユーザー入力
33
  messages.append({"role": "user", "content": message})
34
 
 
35
  response = ""
36
- # ── ③ モデル指定で chat_completion を呼び出し
37
  for chunk in client.chat_completion(
38
- model="mlabonne/gemma-3-27b-it-abliterated",
39
  messages=messages,
40
  max_tokens=max_tokens,
41
  temperature=temperature,
@@ -46,14 +38,20 @@ def respond(
46
  response += delta
47
  yield response
48
 
49
- # ── ④ Gradio の ChatInterface 定義
50
  demo = gr.ChatInterface(
51
  respond,
52
  additional_inputs=[
53
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
54
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
55
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
56
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
 
 
 
 
 
 
57
  ],
58
  )
59
 
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
+ # モデルを gemma-3-27b-it-abliterated に変更
5
+ # provider="hf-inference" でHugging Face Inference APIを明示的に指定
6
+ client = InferenceClient(
7
+ model="mlabonne/gemma-3-27b-it-abliterated",
8
+ provider="hf-inference"
9
+ ) # :contentReference[oaicite:0]{index=0}
 
 
 
10
 
11
  def respond(
12
  message,
 
16
  temperature,
17
  top_p,
18
  ):
19
+ # system_message→history→最新ユーザー発話 の順に messages を構築
20
  messages = [{"role": "system", "content": system_message}]
 
21
  for u, a in history:
22
  if u:
23
  messages.append({"role": "user", "content": u})
24
  if a:
25
  messages.append({"role": "assistant", "content": a})
 
26
  messages.append({"role": "user", "content": message})
27
 
28
+ # chat_completion を呼び出し(stream=True でトークン毎に返す)
29
  response = ""
 
30
  for chunk in client.chat_completion(
 
31
  messages=messages,
32
  max_tokens=max_tokens,
33
  temperature=temperature,
 
38
  response += delta
39
  yield response
40
 
41
+ # GradioのチャットUIをそのまま利用
42
  demo = gr.ChatInterface(
43
  respond,
44
  additional_inputs=[
45
  gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
46
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
47
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
48
+ gr.Slider(
49
+ minimum=0.1,
50
+ maximum=1.0,
51
+ value=0.95,
52
+ step=0.05,
53
+ label="Top-p (nucleus sampling)",
54
+ ),
55
  ],
56
  )
57