dev7halo commited on
Commit
1f8e2b4
ยท
verified ยท
1 Parent(s): cccf1bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -254
app.py CHANGED
@@ -1,304 +1,91 @@
1
  import gradio as gr
2
- import os
3
- import re
4
-
5
- def validate_token(token):
6
- """ํ† ํฐ ํ˜•์‹ ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ"""
7
- if not token:
8
- return False
9
- # ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ์€ ๋ณดํ†ต hf_๋กœ ์‹œ์ž‘ํ•˜๊ณ  ์•ŒํŒŒ๋ฒณ๊ณผ ์ˆซ์ž๋กœ ๊ตฌ์„ฑ
10
- return bool(re.match(r'^hf_[a-zA-Z0-9]{34,}$', token))
11
-
12
- def safe_tokenizer_load(model_name, hf_token=None):
13
- """์•ˆ์ „ํ•œ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ"""
14
- from transformers import AutoTokenizer
15
-
16
- # ๋‹ค์–‘ํ•œ ๋กœ๋”ฉ ์ „๋žต ์‹œ๋„
17
- strategies = [
18
- {"trust_remote_code": True, "use_fast": False},
19
- {"trust_remote_code": True, "use_fast": True},
20
- {"trust_remote_code": False, "use_fast": False},
21
- {"trust_remote_code": False, "use_fast": True},
22
- ]
23
-
24
- for strategy in strategies:
25
- try:
26
- tokenizer_kwargs = strategy.copy()
27
- if hf_token and hf_token.strip():
28
- tokenizer_kwargs["token"] = hf_token.strip()
29
-
30
- tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
31
- return tokenizer, None
32
- except Exception as e:
33
- last_error = e
34
- continue
35
-
36
- return None, last_error
37
 
38
  def count_tokens(model_name, text, hf_token=None):
39
- """ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•˜๋Š” ํ•จ์ˆ˜"""
40
  try:
41
  if not model_name or not text:
42
  return "๋ชจ๋ธ๋ช…๊ณผ ํ…์ŠคํŠธ๋ฅผ ๋ชจ๋‘ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
43
 
44
- # ํ† ํฐ ๊ฒ€์ฆ
45
- if hf_token and hf_token.strip():
46
- token = hf_token.strip()
47
- if not validate_token(token):
48
- return "โŒ ํ† ํฐ ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ์€ 'hf_'๋กœ ์‹œ์ž‘ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค."
49
-
50
- # ์•ˆ์ „ํ•œ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ
51
- tokenizer, error = safe_tokenizer_load(model_name, hf_token)
52
- if tokenizer is None:
53
- raise error
54
-
55
- # ํŒจ๋”ฉ ํ† ํฐ์ด ์—†๋Š” ๊ฒฝ์šฐ ์ถ”๊ฐ€
56
- if tokenizer.pad_token is None:
57
- if tokenizer.eos_token:
58
- tokenizer.pad_token = tokenizer.eos_token
59
- elif tokenizer.unk_token:
60
- tokenizer.pad_token = tokenizer.unk_token
61
-
62
- # ํ† ํฐํ™” - ์—ฌ๋Ÿฌ ๋ฐฉ๋ฒ• ์‹œ๋„
63
- try:
64
- tokens = tokenizer.encode(text, add_special_tokens=True)
65
- except:
66
- # ๋” ์•ˆ์ „ํ•œ ๋ฐฉ๋ฒ•์œผ๋กœ ์‹œ๋„
67
- tokens = tokenizer.encode(text, add_special_tokens=False)
68
 
 
 
69
  token_count = len(tokens)
70
 
71
- # ํ† ํฐ ๋””์ฝ”๋”ฉ (์•ˆ์ „ํ•˜๊ฒŒ ์ฒ˜๋ฆฌ)
72
- decoded_tokens = []
73
- for i, token in enumerate(tokens[:50]): # ์ฒ˜์Œ 50๊ฐœ๋งŒ
74
- try:
75
- decoded = tokenizer.decode([token])
76
- if decoded.strip() == '':
77
- decoded_tokens.append(f"<empty_{token}>")
78
- elif decoded.strip() == tokenizer.pad_token:
79
- decoded_tokens.append(f"<pad_{token}>")
80
- elif decoded.strip() == tokenizer.eos_token:
81
- decoded_tokens.append(f"<eos_{token}>")
82
- elif decoded.strip() == tokenizer.bos_token:
83
- decoded_tokens.append(f"<bos_{token}>")
84
- else:
85
- decoded_tokens.append(repr(decoded))
86
- except:
87
- decoded_tokens.append(f"<token_{token}>")
88
-
89
- result = f"โœ… ํ† ํฐ ์ˆ˜: {token_count}\n\n"
90
  result += f"๋ชจ๋ธ: {model_name}\n"
91
- result += f"ํ…์ŠคํŠธ ๊ธธ์ด: {len(text)} ๊ธ€์ž\n"
92
- result += f"ํ† ํฌ๋‚˜์ด์ € ํƒ€์ž…: {type(tokenizer).__name__}\n\n"
93
- result += f"ํ† ํฐ๋“ค (์ฒ˜์Œ 50๊ฐœ):\n{decoded_tokens}"
94
-
95
- if len(tokens) > 50:
96
- result += f"\n\n... (์ด {len(tokens)}๊ฐœ ํ† ํฐ ์ค‘ 50๊ฐœ๋งŒ ํ‘œ์‹œ)"
97
 
98
  return result
99
 
100
  except Exception as e:
101
- error_str = str(e)
102
- error_msg = f"โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {error_str}\n\n"
103
-
104
- if "401" in error_str and "Unauthorized" in error_str:
105
- error_msg += "๐Ÿ” ์ธ์ฆ ์˜ค๋ฅ˜:\n"
106
- error_msg += "1. ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ์ด ์˜ฌ๋ฐ”๋ฅธ์ง€ ํ™•์ธํ•˜์„ธ์š”\n"
107
- error_msg += "2. ํ† ํฐ์ด 'hf_'๋กœ ์‹œ์ž‘ํ•˜๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”\n"
108
- error_msg += "3. ํ•ด๋‹น ๋ชจ๋ธ์— ๋Œ€ํ•œ ์ ‘๊ทผ ๊ถŒํ•œ์ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”\n"
109
- error_msg += f"4. ๋ชจ๋ธ ํŽ˜์ด์ง€ ๋ฐฉ๋ฌธ: https://huggingface.co/{model_name}\n"
110
- elif "gated repo" in error_str:
111
- error_msg += "๐Ÿ” ์ ‘๊ทผ ์ œํ•œ๋œ ๋ชจ๋ธ:\n"
112
- error_msg += f"1. https://huggingface.co/{model_name} ์—์„œ ์ ‘๊ทผ ๊ถŒํ•œ์„ ์š”์ฒญํ•˜์„ธ์š”\n"
113
- error_msg += "2. ์Šน์ธ ํ›„ ์œ ํšจํ•œ ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ์„ ์ž…๋ ฅํ•˜์„ธ์š”\n"
114
- elif "does not exist" in error_str or "not found" in error_str:
115
- error_msg += "๐Ÿ“ ๋ชจ๋ธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค:\n"
116
- error_msg += "1. ๋ชจ๋ธ๋ช…์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”\n"
117
- error_msg += "2. ๊ณต๊ฐœ ๋ชจ๋ธ ์˜ˆ์‹œ: 'klue/bert-base', 'beomi/KcELECTRA-base', 'gpt2'\n"
118
- elif "data did not match any variant" in error_str:
119
- error_msg += "โš ๏ธ ๋ชจ๋ธ ํŒŒ์ผ ๊ตฌ์กฐ ๋ฌธ์ œ:\n"
120
- error_msg += "1. ์ด ๋ชจ๋ธ์€ ํ˜„์žฌ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค\n"
121
- error_msg += "2. ๋‹ค๋ฅธ ๋ชจ๋ธ์„ ์‹œ๋„ํ•ด๋ณด์„ธ์š”\n"
122
- error_msg += "3. ์ถ”์ฒœ ๋ชจ๋ธ: 'gpt2', 'microsoft/DialoGPT-medium', 'klue/bert-base'\n"
123
- elif "Tokenizer class" in error_str:
124
- error_msg += "๐Ÿ”ง ํ† ํฌ๋‚˜์ด์ € ํด๋ž˜์Šค ๋ฌธ์ œ:\n"
125
- error_msg += "1. ์ด ๋ชจ๋ธ์€ ์ตœ์‹  transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ํ•„์š”ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค\n"
126
- error_msg += "2. ๋‹ค๋ฅธ ๋ชจ๋ธ์„ ์‹œ๋„ํ•ด๋ณด์„ธ์š”\n"
127
- else:
128
- error_msg += "๐Ÿ”ง ๊ฐ€๋Šฅํ•œ ํ•ด๊ฒฐ ๋ฐฉ๋ฒ•:\n"
129
- error_msg += "1. ๋ชจ๋ธ๋ช…์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”\n"
130
- error_msg += "2. ๋„คํŠธ์›Œํฌ ์—ฐ๊ฒฐ์„ ํ™•์ธํ•ด์ฃผ์„ธ์š”\n"
131
- error_msg += "3. ๋‹ค๋ฅธ ๋ชจ๋ธ์„ ์‹œ๋„ํ•ด๋ณด์„ธ์š”\n"
132
-
133
- return error_msg
134
 
135
- def check_model_access(model_name, hf_token=None):
136
- """๋ชจ๋ธ ์ ‘๊ทผ ๊ฐ€๋Šฅ ์—ฌ๋ถ€ ํ™•์ธ"""
137
  try:
138
  if not model_name:
139
  return "๋ชจ๋ธ๋ช…์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
140
 
141
- if hf_token and hf_token.strip():
142
- token = hf_token.strip()
143
- if not validate_token(token):
144
- return "โŒ ํ† ํฐ ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅด์ง€ ์•Š์Šต๋‹ˆ๋‹ค. ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ์€ 'hf_'๋กœ ์‹œ์ž‘ํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค."
145
-
146
- # ์•ˆ์ „ํ•œ ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ
147
- tokenizer, error = safe_tokenizer_load(model_name, hf_token)
148
- if tokenizer is None:
149
- raise error
150
-
151
- # ํ† ํฌ๋‚˜์ด์ € ์ •๋ณด ํ‘œ์‹œ
152
- vocab_size = getattr(tokenizer, 'vocab_size', "์•Œ ์ˆ˜ ์—†์Œ")
153
- model_max_length = getattr(tokenizer, 'model_max_length', "์•Œ ์ˆ˜ ์—†์Œ")
154
-
155
- result = f"โœ… {model_name} ๋ชจ๋ธ ์ ‘๊ทผ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค!\n\n"
156
- result += f"ํ† ํฌ๋‚˜์ด์ € ์ •๋ณด:\n"
157
- result += f"- ์–ดํœ˜ ํฌ๊ธฐ: {vocab_size}\n"
158
- result += f"- ์ตœ๋Œ€ ๊ธธ์ด: {model_max_length}\n"
159
- result += f"- ํ† ํฌ๋‚˜์ด์ € ํƒ€์ž…: {type(tokenizer).__name__}\n"
160
-
161
- # ํŠน์ˆ˜ ํ† ํฐ ์ •๋ณด
162
- special_tokens = []
163
- if hasattr(tokenizer, 'pad_token') and tokenizer.pad_token:
164
- special_tokens.append(f"PAD: {tokenizer.pad_token}")
165
- if hasattr(tokenizer, 'eos_token') and tokenizer.eos_token:
166
- special_tokens.append(f"EOS: {tokenizer.eos_token}")
167
- if hasattr(tokenizer, 'bos_token') and tokenizer.bos_token:
168
- special_tokens.append(f"BOS: {tokenizer.bos_token}")
169
- if hasattr(tokenizer, 'unk_token') and tokenizer.unk_token:
170
- special_tokens.append(f"UNK: {tokenizer.unk_token}")
171
-
172
- if special_tokens:
173
- result += f"- ํŠน์ˆ˜ ํ† ํฐ: {', '.join(special_tokens)}"
174
 
175
- return result
176
 
177
  except Exception as e:
178
- error_str = str(e)
179
-
180
- if "401" in error_str and "Unauthorized" in error_str:
181
- return f"๐Ÿ” {model_name}: ์ธ์ฆ ์˜ค๋ฅ˜์ž…๋‹ˆ๋‹ค. ํ† ํฐ์„ ํ™•์ธํ•˜๊ฑฐ๋‚˜ ๋ชจ๋ธ ์ ‘๊ทผ ๊ถŒํ•œ์„ ์š”์ฒญํ•˜์„ธ์š”."
182
- elif "gated repo" in error_str:
183
- return f"๐Ÿ” {model_name}์€ ์ ‘๊ทผ ๊ถŒํ•œ์ด ํ•„์š”ํ•œ ๋ชจ๋ธ์ž…๋‹ˆ๋‹ค."
184
- elif "does not exist" in error_str:
185
- return f"โŒ {model_name} ๋ชจ๋ธ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
186
- elif "data did not match any variant" in error_str:
187
- return f"โš ๏ธ {model_name} ๋ชจ๋ธ์€ ํ˜„์žฌ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ž…๋‹ˆ๋‹ค."
188
- else:
189
- return f"โŒ ์˜ค๋ฅ˜: {error_str}"
190
 
191
- # Gradio ์ธํ„ฐํŽ˜์ด์Šค ์ƒ์„ฑ
192
  def create_interface():
193
- with gr.Blocks(title="ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ", theme=gr.themes.Soft()) as demo:
194
- gr.Markdown("# ๐Ÿ”ข ํ—ˆ๊น…ํŽ˜์ด์Šค ๋ชจ๋ธ ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ")
195
- gr.Markdown("ํ—ˆ๊น…ํŽ˜์ด์Šค์— ์˜ฌ๋ผ์˜จ ๋ชจ๋ธ์˜ ํ† ํฌ๋‚˜์ด์ €๋ฅผ ์‚ฌ์šฉํ•ด ํ…์ŠคํŠธ์˜ ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•ฉ๋‹ˆ๋‹ค.")
196
 
197
  with gr.Row():
198
  with gr.Column():
199
  model_input = gr.Textbox(
200
  label="๋ชจ๋ธ๋ช…",
201
- placeholder="์˜ˆ: klue/bert-base, beomi/KcELECTRA-base, gpt2",
202
- value="klue/bert-base"
203
  )
204
 
205
  token_input = gr.Textbox(
206
- label="ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ (์„ ํƒ์‚ฌํ•ญ)",
207
- placeholder="gated ๋ชจ๋ธ ์‚ฌ์šฉ์‹œ ํ•„์š” (hf_xxx...)",
208
  type="password"
209
  )
210
 
211
  text_input = gr.Textbox(
212
  label="ํ…์ŠคํŠธ",
213
- placeholder="ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•  ํ…์ŠคํŠธ๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”...",
214
  lines=5,
215
- value="์•ˆ๋…•ํ•˜์„ธ์š”! ์ด๊ฒƒ์€ ํ…Œ์ŠคํŠธ ํ…์ŠคํŠธ์ž…๋‹ˆ๋‹ค."
216
  )
217
 
218
  with gr.Row():
219
- check_btn = gr.Button("๋ชจ๋ธ ์ ‘๊ทผ ํ™•์ธ", variant="secondary")
220
- calculate_btn = gr.Button("ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ", variant="primary")
221
 
222
  with gr.Column():
223
- output = gr.Textbox(
224
- label="๊ฒฐ๊ณผ",
225
- lines=15,
226
- show_copy_button=True
227
- )
228
-
229
- # ๋ชจ๋ธ ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์˜ˆ์‹œ
230
- with gr.Tabs():
231
- with gr.TabItem("โœ… ์•ˆ์ •์ ์ธ ๋ชจ๋ธ"):
232
- gr.Markdown("### ํ™•์‹คํžˆ ์ž‘๋™ํ•˜๋Š” ๋ชจ๋ธ๋“ค:")
233
- with gr.Row():
234
- stable_models = [
235
- "klue/bert-base",
236
- "beomi/KcELECTRA-base",
237
- "gpt2",
238
- "microsoft/DialoGPT-medium",
239
- "distilbert-base-uncased",
240
- "t5-small"
241
- ]
242
-
243
- for model in stable_models:
244
- btn = gr.Button(model, size="sm")
245
- btn.click(lambda x=model: x, outputs=model_input)
246
-
247
- with gr.TabItem("โš ๏ธ ๋ฌธ์ œ๊ฐ€ ์žˆ์„ ์ˆ˜ ์žˆ๋Š” ๋ชจ๋ธ"):
248
- gr.Markdown("### ์ง€์›๋˜์ง€ ์•Š๊ฑฐ๋‚˜ ๋ฌธ์ œ๊ฐ€ ์žˆ์„ ์ˆ˜ ์žˆ๋Š” ๋ชจ๋ธ๋“ค:")
249
- gr.Markdown("์ด ๋ชจ๋ธ๋“ค์€ ํ˜„์žฌ ๋ฒ„์ „์—์„œ ์ž‘๋™ํ•˜์ง€ ์•Š์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.")
250
- with gr.Row():
251
- problematic_models = [
252
- "google/gemma-3-12b-it",
253
- "meta-llama/Llama-2-7b-hf",
254
- "mistralai/Mistral-7B-v0.1"
255
- ]
256
-
257
- for model in problematic_models:
258
- btn = gr.Button(model, size="sm")
259
- btn.click(lambda x=model: x, outputs=model_input)
260
 
261
- # ์‚ฌ์šฉ๋ฒ• ๊ฐ€์ด๋“œ
262
- with gr.Accordion("๐Ÿ“– ์‚ฌ์šฉ๋ฒ• ๊ฐ€์ด๋“œ", open=False):
263
- gr.Markdown("""
264
- ### ๊ธฐ๋ณธ ์‚ฌ์šฉ๋ฒ•:
265
- 1. **๋ชจ๋ธ๋ช… ์ž…๋ ฅ**: ํ—ˆ๊น…ํŽ˜์ด์Šค ๋ชจ๋ธ๋ช… (์˜ˆ: klue/bert-base)
266
- 2. **ํ…์ŠคํŠธ ์ž…๋ ฅ**: ํ† ํฐ ์ˆ˜๋ฅผ ๊ณ„์‚ฐํ•  ํ…์ŠคํŠธ
267
- 3. **๊ณ„์‚ฐ ๋ฒ„ํŠผ ํด๋ฆญ**: ๊ฒฐ๊ณผ ํ™•์ธ
268
-
269
- ### ํ† ํฐ์ด ํ•„์š”ํ•œ ๊ฒฝ์šฐ:
270
- - Gated ๋ชจ๋ธ (Meta Llama, Google Gemma ๋“ฑ)
271
- - ๋น„๊ณต๊ฐœ ๋ชจ๋ธ
272
-
273
- ### ํ† ํฐ ์ƒ์„ฑ:
274
- 1. [ํ—ˆ๊น…ํŽ˜์ด์Šค ํ† ํฐ ํŽ˜์ด์ง€](https://huggingface.co/settings/tokens) ๋ฐฉ๋ฌธ
275
- 2. "New token" ์ƒ์„ฑ (Read ๊ถŒํ•œ)
276
- 3. ํ† ํฐ์„ ์œ„์˜ ํ•„๋“œ์— ์ž…๋ ฅ
277
-
278
- ### ๋ฌธ์ œ ํ•ด๊ฒฐ:
279
- - ๋ชจ๋ธ์ด ์ง€์›๋˜์ง€ ์•Š๋Š” ๊ฒฝ์šฐ ๋‹ค๋ฅธ ๋ชจ๋ธ ์‹œ๋„
280
- - ๋„คํŠธ์›Œํฌ ๋ฌธ์ œ ์‹œ ์ž ์‹œ ํ›„ ์žฌ์‹œ๋„
281
- - ์•ˆ์ •์ ์ธ ๋ชจ๋ธ ํƒญ์˜ ๋ชจ๋ธ๋“ค ์‚ฌ์šฉ ๊ถŒ์žฅ
282
- """)
283
 
284
  # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
285
- check_btn.click(
286
- check_model_access,
287
- inputs=[model_input, token_input],
288
- outputs=output
289
- )
290
-
291
- calculate_btn.click(
292
- count_tokens,
293
- inputs=[model_input, text_input, token_input],
294
- outputs=output
295
- )
296
-
297
- text_input.submit(
298
- count_tokens,
299
- inputs=[model_input, text_input, token_input],
300
- outputs=output
301
- )
302
 
303
  return demo
304
 
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def count_tokens(model_name, text, hf_token=None):
5
+ """ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ"""
6
  try:
7
  if not model_name or not text:
8
  return "๋ชจ๋ธ๋ช…๊ณผ ํ…์ŠคํŠธ๋ฅผ ๋ชจ๋‘ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
9
 
10
+ # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
11
+ tokenizer = AutoTokenizer.from_pretrained(
12
+ model_name,
13
+ token=hf_token.strip() if hf_token and hf_token.strip() else None
14
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # ํ† ํฐ ์ธ์ฝ”๋”ฉ
17
+ tokens = tokenizer.encode(text)
18
  token_count = len(tokens)
19
 
20
+ # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
21
+ result = f"โœ… ํ† ํฐ ์ˆ˜: {token_count}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  result += f"๋ชจ๋ธ: {model_name}\n"
23
+ result += f"ํ…์ŠคํŠธ ๊ธธ์ด: {len(text)} ๊ธ€์ž"
 
 
 
 
 
24
 
25
  return result
26
 
27
  except Exception as e:
28
+ return f"โŒ ์˜ค๋ฅ˜: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ def check_model(model_name, hf_token=None):
31
+ """๋ชจ๋ธ ์ ‘๊ทผ ํ™•์ธ"""
32
  try:
33
  if not model_name:
34
  return "๋ชจ๋ธ๋ช…์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
35
 
36
+ tokenizer = AutoTokenizer.from_pretrained(
37
+ model_name,
38
+ token=hf_token.strip() if hf_token and hf_token.strip() else None
39
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
+ return f"โœ… {model_name} ๋ชจ๋ธ ์ ‘๊ทผ ๊ฐ€๋Šฅ!"
42
 
43
  except Exception as e:
44
+ return f"โŒ ์˜ค๋ฅ˜: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ # Gradio ์ธํ„ฐํŽ˜์ด์Šค
47
  def create_interface():
48
+ with gr.Blocks(title="ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ") as demo:
49
+ gr.Markdown("# ๐Ÿ”ข ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ")
 
50
 
51
  with gr.Row():
52
  with gr.Column():
53
  model_input = gr.Textbox(
54
  label="๋ชจ๋ธ๋ช…",
55
+ placeholder="์˜ˆ: gpt2, klue/bert-base",
56
+ value="gpt2"
57
  )
58
 
59
  token_input = gr.Textbox(
60
+ label="HF ํ† ํฐ (์„ ํƒ์‚ฌํ•ญ)",
 
61
  type="password"
62
  )
63
 
64
  text_input = gr.Textbox(
65
  label="ํ…์ŠคํŠธ",
 
66
  lines=5,
67
+ value="์•ˆ๋…•ํ•˜์„ธ์š”! ํ…Œ์ŠคํŠธ ํ…์ŠคํŠธ์ž…๋‹ˆ๋‹ค."
68
  )
69
 
70
  with gr.Row():
71
+ check_btn = gr.Button("๋ชจ๋ธ ํ™•์ธ")
72
+ calc_btn = gr.Button("ํ† ํฐ ๊ณ„์‚ฐ", variant="primary")
73
 
74
  with gr.Column():
75
+ output = gr.Textbox(label="๊ฒฐ๊ณผ", lines=10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ # ์ถ”์ฒœ ๋ชจ๋ธ
78
+ gr.Markdown("### ์ถ”์ฒœ ๋ชจ๋ธ")
79
+ with gr.Row():
80
+ models = ["gpt2", "klue/bert-base", "microsoft/DialoGPT-medium"]
81
+ for model in models:
82
+ btn = gr.Button(model, size="sm")
83
+ btn.click(lambda x=model: x, outputs=model_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
86
+ check_btn.click(check_model, [model_input, token_input], output)
87
+ calc_btn.click(count_tokens, [model_input, text_input, token_input], output)
88
+ text_input.submit(count_tokens, [model_input, text_input, token_input], output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  return demo
91