dev7halo commited on
Commit
cccf1bf
Β·
verified Β·
1 Parent(s): 30e4068

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -61
app.py CHANGED
@@ -1,5 +1,39 @@
1
  import gradio as gr
2
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def count_tokens(model_name, text, hf_token=None):
5
  """토큰 수λ₯Ό κ³„μ‚°ν•˜λŠ” ν•¨μˆ˜"""
@@ -7,50 +41,94 @@ def count_tokens(model_name, text, hf_token=None):
7
  if not model_name or not text:
8
  return "λͺ¨λΈλͺ…κ³Ό ν…μŠ€νŠΈλ₯Ό λͺ¨λ‘ μž…λ ₯ν•΄μ£Όμ„Έμš”."
9
 
10
- # transformers μž„ν¬νŠΈλ₯Ό ν•¨μˆ˜ λ‚΄λΆ€μ—μ„œ 처리
11
- from transformers import AutoTokenizer
12
-
13
- # ν† ν¬λ‚˜μ΄μ € λ‘œλ“œ (토큰이 있으면 μ‚¬μš©)
14
- tokenizer_kwargs = {"trust_remote_code": True}
15
  if hf_token and hf_token.strip():
16
- tokenizer_kwargs["token"] = hf_token.strip()
 
 
17
 
18
- tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
 
 
 
19
 
20
- # 토큰화
21
- tokens = tokenizer.encode(text)
22
- token_count = len(tokens)
 
 
 
23
 
24
- # 토큰 λ””μ½”λ”© (선택사항 - 토큰듀을 보여주기 μœ„ν•΄)
25
  try:
26
- decoded_tokens = [tokenizer.decode([token]) for token in tokens]
27
  except:
28
- decoded_tokens = ["토큰 λ””μ½”λ”© μ‹€νŒ¨"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  result = f"βœ… 토큰 수: {token_count}\n\n"
31
- result += f"토큰듀: {decoded_tokens[:50]}" # 처음 50개만 ν‘œμ‹œ
32
- if len(decoded_tokens) > 50:
33
- result += f"\n... (총 {len(decoded_tokens)}개 토큰 쀑 50개만 ν‘œμ‹œ)"
 
 
 
 
34
 
35
  return result
36
 
37
  except Exception as e:
38
- error_msg = f"❌ 였λ₯˜ λ°œμƒ: {str(e)}\n\n"
 
39
 
40
- if "gated repo" in str(e):
41
- error_msg += "πŸ” 이 λͺ¨λΈμ€ μ ‘κ·Ό ꢌ���이 ν•„μš”ν•©λ‹ˆλ‹€:\n"
 
 
 
 
 
 
42
  error_msg += f"1. https://huggingface.co/{model_name} μ—μ„œ μ ‘κ·Ό κΆŒν•œμ„ μš”μ²­ν•˜μ„Έμš”\n"
43
- error_msg += "2. ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰을 μž…λ ₯ν•˜μ„Έμš”\n"
44
- error_msg += "3. 토큰 생성: https://huggingface.co/settings/tokens\n\n"
45
- elif "does not exist" in str(e) or "not found" in str(e):
46
  error_msg += "πŸ“ λͺ¨λΈμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€:\n"
47
  error_msg += "1. λͺ¨λΈλͺ…을 ν™•μΈν•΄μ£Όμ„Έμš”\n"
48
- error_msg += "2. 곡개 λͺ¨λΈ μ˜ˆμ‹œ: 'klue/bert-base', 'beomi/KcELECTRA-base', 'gpt2'\n\n"
 
 
 
 
 
 
 
 
 
49
  else:
50
  error_msg += "πŸ”§ κ°€λŠ₯ν•œ ν•΄κ²° 방법:\n"
51
  error_msg += "1. λͺ¨λΈλͺ…을 ν™•μΈν•΄μ£Όμ„Έμš”\n"
52
  error_msg += "2. λ„€νŠΈμ›Œν¬ 연결을 ν™•μΈν•΄μ£Όμ„Έμš”\n"
53
- error_msg += "3. ν•„μš”μ‹œ ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰을 μž…λ ₯ν•΄μ£Όμ„Έμš”\n"
54
 
55
  return error_msg
56
 
@@ -60,22 +138,55 @@ def check_model_access(model_name, hf_token=None):
60
  if not model_name:
61
  return "λͺ¨λΈλͺ…을 μž…λ ₯ν•΄μ£Όμ„Έμš”."
62
 
63
- from transformers import AutoTokenizer
64
-
65
- tokenizer_kwargs = {"trust_remote_code": True}
66
  if hf_token and hf_token.strip():
67
- tokenizer_kwargs["token"] = hf_token.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
70
- return f"βœ… {model_name} λͺ¨λΈ μ ‘κ·Ό κ°€λŠ₯ν•©λ‹ˆλ‹€!"
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  except Exception as e:
73
- if "gated repo" in str(e):
74
- return f"πŸ” {model_name}은 μ ‘κ·Ό κΆŒν•œμ΄ ν•„μš”ν•œ λͺ¨λΈμž…λ‹ˆλ‹€. 토큰을 μž…λ ₯ν•΄μ£Όμ„Έμš”."
75
- elif "does not exist" in str(e):
 
 
 
 
76
  return f"❌ {model_name} λͺ¨λΈμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
 
 
77
  else:
78
- return f"❌ 였λ₯˜: {str(e)}"
79
 
80
  # Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
81
  def create_interface():
@@ -100,7 +211,8 @@ def create_interface():
100
  text_input = gr.Textbox(
101
  label="ν…μŠ€νŠΈ",
102
  placeholder="토큰 수λ₯Ό 계산할 ν…μŠ€νŠΈλ₯Ό μž…λ ₯ν•˜μ„Έμš”...",
103
- lines=5
 
104
  )
105
 
106
  with gr.Row():
@@ -110,57 +222,63 @@ def create_interface():
110
  with gr.Column():
111
  output = gr.Textbox(
112
  label="κ²°κ³Ό",
113
- lines=10,
114
  show_copy_button=True
115
  )
116
 
117
  # λͺ¨λΈ μΉ΄ν…Œκ³ λ¦¬λ³„ μ˜ˆμ‹œ
118
  with gr.Tabs():
119
- with gr.TabItem("곡개 λͺ¨λΈ (토큰 λΆˆν•„μš”)"):
120
- gr.Markdown("### 자유둭게 μ‚¬μš© κ°€λŠ₯ν•œ λͺ¨λΈλ“€:")
121
  with gr.Row():
122
- public_models = [
123
  "klue/bert-base",
124
  "beomi/KcELECTRA-base",
125
  "gpt2",
126
- "microsoft/DialoGPT-medium"
 
 
127
  ]
128
 
129
- for model in public_models:
130
  btn = gr.Button(model, size="sm")
131
  btn.click(lambda x=model: x, outputs=model_input)
132
 
133
- with gr.TabItem("μ œν•œλœ λͺ¨λΈ (토큰 ν•„μš”)"):
134
- gr.Markdown("### μ ‘κ·Ό κΆŒν•œμ΄ ν•„μš”ν•œ λͺ¨λΈλ“€:")
135
- gr.Markdown("⚠️ 이 λͺ¨λΈλ“€μ€ ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰이 ν•„μš”ν•©λ‹ˆλ‹€")
136
  with gr.Row():
137
- gated_models = [
 
138
  "meta-llama/Llama-2-7b-hf",
139
- "google/gemma-7b",
140
  "mistralai/Mistral-7B-v0.1"
141
  ]
142
 
143
- for model in gated_models:
144
  btn = gr.Button(model, size="sm")
145
  btn.click(lambda x=model: x, outputs=model_input)
146
 
147
- # 토큰 κ°€μ΄λ“œ
148
- with gr.Accordion("πŸ”‘ ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰 κ°€μ΄λ“œ", open=False):
149
  gr.Markdown("""
 
 
 
 
 
150
  ### 토큰이 ν•„μš”ν•œ 경우:
151
- 1. **Gated λͺ¨λΈ**: Meta Llama, Google Gemma λ“±
152
- 2. **λΉ„κ³΅κ°œ λͺ¨λΈ**: κ°œμΈμ΄λ‚˜ 쑰직의 private λͺ¨λΈ
153
 
154
- ### 토큰 생성 방법:
155
- 1. [ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰 νŽ˜μ΄μ§€](https://huggingface.co/settings/tokens) 접속
156
- 2. "New token" 클릭
157
- 3. "Read" κΆŒν•œμœΌλ‘œ 토큰 생성
158
- 4. μƒμ„±λœ 토큰을 μœ„μ˜ "ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰" ν•„λ“œμ— μž…λ ₯
159
 
160
- ### λͺ¨λΈ μ ‘κ·Ό κΆŒν•œ μš”μ²­:
161
- 1. μ‚¬μš©ν•˜λ €λŠ” λͺ¨λΈ νŽ˜μ΄μ§€ λ°©λ¬Έ
162
- 2. "Request access" λ²„νŠΌ 클릭
163
- 3. 승인 ν›„ 토큰과 ν•¨κ»˜ ���용
164
  """)
165
 
166
  # 이벀트 ν•Έλ“€λŸ¬
@@ -176,7 +294,6 @@ def create_interface():
176
  outputs=output
177
  )
178
 
179
- # μ—”ν„°ν‚€λ‘œλ„ μ‹€ν–‰ κ°€λŠ₯ν•˜κ²Œ
180
  text_input.submit(
181
  count_tokens,
182
  inputs=[model_input, text_input, token_input],
 
1
  import gradio as gr
2
  import os
3
+ import re
4
+
5
+ def validate_token(token):
6
+ """토큰 ν˜•μ‹ μœ νš¨μ„± 검사"""
7
+ if not token:
8
+ return False
9
+ # ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰은 보톡 hf_둜 μ‹œμž‘ν•˜κ³  μ•ŒνŒŒλ²³κ³Ό 숫자둜 ꡬ성
10
+ return bool(re.match(r'^hf_[a-zA-Z0-9]{34,}$', token))
11
+
12
+ def safe_tokenizer_load(model_name, hf_token=None):
13
+ """μ•ˆμ „ν•œ ν† ν¬λ‚˜μ΄μ € λ‘œλ”©"""
14
+ from transformers import AutoTokenizer
15
+
16
+ # λ‹€μ–‘ν•œ λ‘œλ”© μ „λž΅ μ‹œλ„
17
+ strategies = [
18
+ {"trust_remote_code": True, "use_fast": False},
19
+ {"trust_remote_code": True, "use_fast": True},
20
+ {"trust_remote_code": False, "use_fast": False},
21
+ {"trust_remote_code": False, "use_fast": True},
22
+ ]
23
+
24
+ for strategy in strategies:
25
+ try:
26
+ tokenizer_kwargs = strategy.copy()
27
+ if hf_token and hf_token.strip():
28
+ tokenizer_kwargs["token"] = hf_token.strip()
29
+
30
+ tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
31
+ return tokenizer, None
32
+ except Exception as e:
33
+ last_error = e
34
+ continue
35
+
36
+ return None, last_error
37
 
38
  def count_tokens(model_name, text, hf_token=None):
39
  """토큰 수λ₯Ό κ³„μ‚°ν•˜λŠ” ν•¨μˆ˜"""
 
41
  if not model_name or not text:
42
  return "λͺ¨λΈλͺ…κ³Ό ν…μŠ€νŠΈλ₯Ό λͺ¨λ‘ μž…λ ₯ν•΄μ£Όμ„Έμš”."
43
 
44
+ # 토큰 검증
 
 
 
 
45
  if hf_token and hf_token.strip():
46
+ token = hf_token.strip()
47
+ if not validate_token(token):
48
+ return "❌ 토큰 ν˜•μ‹μ΄ μ˜¬λ°”λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰은 'hf_'둜 μ‹œμž‘ν•΄μ•Ό ν•©λ‹ˆλ‹€."
49
 
50
+ # μ•ˆμ „ν•œ ν† ν¬λ‚˜μ΄μ € λ‘œλ”©
51
+ tokenizer, error = safe_tokenizer_load(model_name, hf_token)
52
+ if tokenizer is None:
53
+ raise error
54
 
55
+ # νŒ¨λ”© 토큰이 μ—†λŠ” 경우 μΆ”κ°€
56
+ if tokenizer.pad_token is None:
57
+ if tokenizer.eos_token:
58
+ tokenizer.pad_token = tokenizer.eos_token
59
+ elif tokenizer.unk_token:
60
+ tokenizer.pad_token = tokenizer.unk_token
61
 
62
+ # 토큰화 - μ—¬λŸ¬ 방법 μ‹œλ„
63
  try:
64
+ tokens = tokenizer.encode(text, add_special_tokens=True)
65
  except:
66
+ # 더 μ•ˆμ „ν•œ λ°©λ²•μœΌλ‘œ μ‹œλ„
67
+ tokens = tokenizer.encode(text, add_special_tokens=False)
68
+
69
+ token_count = len(tokens)
70
+
71
+ # 토큰 λ””μ½”λ”© (μ•ˆμ „ν•˜κ²Œ 처리)
72
+ decoded_tokens = []
73
+ for i, token in enumerate(tokens[:50]): # 처음 50개만
74
+ try:
75
+ decoded = tokenizer.decode([token])
76
+ if decoded.strip() == '':
77
+ decoded_tokens.append(f"<empty_{token}>")
78
+ elif decoded.strip() == tokenizer.pad_token:
79
+ decoded_tokens.append(f"<pad_{token}>")
80
+ elif decoded.strip() == tokenizer.eos_token:
81
+ decoded_tokens.append(f"<eos_{token}>")
82
+ elif decoded.strip() == tokenizer.bos_token:
83
+ decoded_tokens.append(f"<bos_{token}>")
84
+ else:
85
+ decoded_tokens.append(repr(decoded))
86
+ except:
87
+ decoded_tokens.append(f"<token_{token}>")
88
 
89
  result = f"βœ… 토큰 수: {token_count}\n\n"
90
+ result += f"λͺ¨λΈ: {model_name}\n"
91
+ result += f"ν…μŠ€νŠΈ 길이: {len(text)} κΈ€μž\n"
92
+ result += f"ν† ν¬λ‚˜μ΄μ € νƒ€μž…: {type(tokenizer).__name__}\n\n"
93
+ result += f"토큰듀 (처음 50개):\n{decoded_tokens}"
94
+
95
+ if len(tokens) > 50:
96
+ result += f"\n\n... (총 {len(tokens)}개 토큰 쀑 50개만 ν‘œμ‹œ)"
97
 
98
  return result
99
 
100
  except Exception as e:
101
+ error_str = str(e)
102
+ error_msg = f"❌ 였λ₯˜ λ°œμƒ: {error_str}\n\n"
103
 
104
+ if "401" in error_str and "Unauthorized" in error_str:
105
+ error_msg += "πŸ” 인증 였λ₯˜:\n"
106
+ error_msg += "1. ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰이 μ˜¬λ°”λ₯Έμ§€ ν™•μΈν•˜μ„Έμš”\n"
107
+ error_msg += "2. 토큰이 'hf_'둜 μ‹œμž‘ν•˜λŠ”μ§€ ν™•μΈν•˜μ„Έμš”\n"
108
+ error_msg += "3. ν•΄λ‹Ή λͺ¨λΈμ— λŒ€ν•œ μ ‘κ·Ό κΆŒν•œμ΄ μžˆλŠ”μ§€ ν™•μΈν•˜μ„Έμš”\n"
109
+ error_msg += f"4. λͺ¨λΈ νŽ˜μ΄μ§€ λ°©λ¬Έ: https://huggingface.co/{model_name}\n"
110
+ elif "gated repo" in error_str:
111
+ error_msg += "πŸ” μ ‘κ·Ό μ œν•œλœ λͺ¨λΈ:\n"
112
  error_msg += f"1. https://huggingface.co/{model_name} μ—μ„œ μ ‘κ·Ό κΆŒν•œμ„ μš”μ²­ν•˜μ„Έμš”\n"
113
+ error_msg += "2. 승인 ν›„ μœ νš¨ν•œ ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰을 μž…λ ₯ν•˜μ„Έμš”\n"
114
+ elif "does not exist" in error_str or "not found" in error_str:
 
115
  error_msg += "πŸ“ λͺ¨λΈμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€:\n"
116
  error_msg += "1. λͺ¨λΈλͺ…을 ν™•μΈν•΄μ£Όμ„Έμš”\n"
117
+ error_msg += "2. 곡개 λͺ¨λΈ μ˜ˆμ‹œ: 'klue/bert-base', 'beomi/KcELECTRA-base', 'gpt2'\n"
118
+ elif "data did not match any variant" in error_str:
119
+ error_msg += "⚠️ λͺ¨λΈ 파일 ꡬ쑰 문제:\n"
120
+ error_msg += "1. 이 λͺ¨λΈμ€ ν˜„μž¬ μ§€μ›λ˜μ§€ μ•ŠλŠ” ν˜•μ‹μž…λ‹ˆλ‹€\n"
121
+ error_msg += "2. λ‹€λ₯Έ λͺ¨λΈμ„ μ‹œλ„ν•΄λ³΄μ„Έμš”\n"
122
+ error_msg += "3. μΆ”μ²œ λͺ¨λΈ: 'gpt2', 'microsoft/DialoGPT-medium', 'klue/bert-base'\n"
123
+ elif "Tokenizer class" in error_str:
124
+ error_msg += "πŸ”§ ν† ν¬λ‚˜μ΄μ € 클래슀 문제:\n"
125
+ error_msg += "1. 이 λͺ¨λΈμ€ μ΅œμ‹  transformers λΌμ΄λΈŒλŸ¬λ¦¬κ°€ ν•„μš”ν•  수 μžˆμŠ΅λ‹ˆλ‹€\n"
126
+ error_msg += "2. λ‹€λ₯Έ λͺ¨λΈμ„ μ‹œλ„ν•΄λ³΄μ„Έμš”\n"
127
  else:
128
  error_msg += "πŸ”§ κ°€λŠ₯ν•œ ν•΄κ²° 방법:\n"
129
  error_msg += "1. λͺ¨λΈλͺ…을 ν™•μΈν•΄μ£Όμ„Έμš”\n"
130
  error_msg += "2. λ„€νŠΈμ›Œν¬ 연결을 ν™•μΈν•΄μ£Όμ„Έμš”\n"
131
+ error_msg += "3. λ‹€λ₯Έ λͺ¨λΈμ„ μ‹œλ„ν•΄λ³΄μ„Έμš”\n"
132
 
133
  return error_msg
134
 
 
138
  if not model_name:
139
  return "λͺ¨λΈλͺ…을 μž…λ ₯ν•΄μ£Όμ„Έμš”."
140
 
 
 
 
141
  if hf_token and hf_token.strip():
142
+ token = hf_token.strip()
143
+ if not validate_token(token):
144
+ return "❌ 토큰 ν˜•μ‹μ΄ μ˜¬λ°”λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€. ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰은 'hf_'둜 μ‹œμž‘ν•΄μ•Ό ν•©λ‹ˆλ‹€."
145
+
146
+ # μ•ˆμ „ν•œ ν† ν¬λ‚˜μ΄μ € λ‘œλ”©
147
+ tokenizer, error = safe_tokenizer_load(model_name, hf_token)
148
+ if tokenizer is None:
149
+ raise error
150
+
151
+ # ν† ν¬λ‚˜μ΄μ € 정보 ν‘œμ‹œ
152
+ vocab_size = getattr(tokenizer, 'vocab_size', "μ•Œ 수 μ—†μŒ")
153
+ model_max_length = getattr(tokenizer, 'model_max_length', "μ•Œ 수 μ—†μŒ")
154
+
155
+ result = f"βœ… {model_name} λͺ¨λΈ μ ‘κ·Ό κ°€λŠ₯ν•©λ‹ˆλ‹€!\n\n"
156
+ result += f"ν† ν¬λ‚˜μ΄μ € 정보:\n"
157
+ result += f"- μ–΄νœ˜ 크기: {vocab_size}\n"
158
+ result += f"- μ΅œλŒ€ 길이: {model_max_length}\n"
159
+ result += f"- ν† ν¬λ‚˜μ΄μ € νƒ€μž…: {type(tokenizer).__name__}\n"
160
 
161
+ # 특수 토큰 정보
162
+ special_tokens = []
163
+ if hasattr(tokenizer, 'pad_token') and tokenizer.pad_token:
164
+ special_tokens.append(f"PAD: {tokenizer.pad_token}")
165
+ if hasattr(tokenizer, 'eos_token') and tokenizer.eos_token:
166
+ special_tokens.append(f"EOS: {tokenizer.eos_token}")
167
+ if hasattr(tokenizer, 'bos_token') and tokenizer.bos_token:
168
+ special_tokens.append(f"BOS: {tokenizer.bos_token}")
169
+ if hasattr(tokenizer, 'unk_token') and tokenizer.unk_token:
170
+ special_tokens.append(f"UNK: {tokenizer.unk_token}")
171
+
172
+ if special_tokens:
173
+ result += f"- 특수 토큰: {', '.join(special_tokens)}"
174
+
175
+ return result
176
 
177
  except Exception as e:
178
+ error_str = str(e)
179
+
180
+ if "401" in error_str and "Unauthorized" in error_str:
181
+ return f"πŸ” {model_name}: 인증 였λ₯˜μž…λ‹ˆλ‹€. 토큰을 ν™•μΈν•˜κ±°λ‚˜ λͺ¨λΈ μ ‘κ·Ό κΆŒν•œμ„ μš”μ²­ν•˜μ„Έμš”."
182
+ elif "gated repo" in error_str:
183
+ return f"πŸ” {model_name}은 μ ‘κ·Ό κΆŒν•œμ΄ ν•„μš”ν•œ λͺ¨λΈμž…λ‹ˆλ‹€."
184
+ elif "does not exist" in error_str:
185
  return f"❌ {model_name} λͺ¨λΈμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
186
+ elif "data did not match any variant" in error_str:
187
+ return f"⚠️ {model_name} λͺ¨λΈμ€ ν˜„μž¬ μ§€μ›λ˜μ§€ μ•ŠλŠ” ν˜•μ‹μž…λ‹ˆλ‹€."
188
  else:
189
+ return f"❌ 였λ₯˜: {error_str}"
190
 
191
  # Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
192
  def create_interface():
 
211
  text_input = gr.Textbox(
212
  label="ν…μŠ€νŠΈ",
213
  placeholder="토큰 수λ₯Ό 계산할 ν…μŠ€νŠΈλ₯Ό μž…λ ₯ν•˜μ„Έμš”...",
214
+ lines=5,
215
+ value="μ•ˆλ…•ν•˜μ„Έμš”! 이것은 ν…ŒμŠ€νŠΈ ν…μŠ€νŠΈμž…λ‹ˆλ‹€."
216
  )
217
 
218
  with gr.Row():
 
222
  with gr.Column():
223
  output = gr.Textbox(
224
  label="κ²°κ³Ό",
225
+ lines=15,
226
  show_copy_button=True
227
  )
228
 
229
  # λͺ¨λΈ μΉ΄ν…Œκ³ λ¦¬λ³„ μ˜ˆμ‹œ
230
  with gr.Tabs():
231
+ with gr.TabItem("βœ… μ•ˆμ •μ μΈ λͺ¨λΈ"):
232
+ gr.Markdown("### ν™•μ‹€νžˆ μž‘λ™ν•˜λŠ” λͺ¨λΈλ“€:")
233
  with gr.Row():
234
+ stable_models = [
235
  "klue/bert-base",
236
  "beomi/KcELECTRA-base",
237
  "gpt2",
238
+ "microsoft/DialoGPT-medium",
239
+ "distilbert-base-uncased",
240
+ "t5-small"
241
  ]
242
 
243
+ for model in stable_models:
244
  btn = gr.Button(model, size="sm")
245
  btn.click(lambda x=model: x, outputs=model_input)
246
 
247
+ with gr.TabItem("⚠️ λ¬Έμ œκ°€ μžˆμ„ 수 μžˆλŠ” λͺ¨λΈ"):
248
+ gr.Markdown("### μ§€μ›λ˜μ§€ μ•Šκ±°λ‚˜ λ¬Έμ œκ°€ μžˆμ„ 수 μžˆλŠ” λͺ¨λΈλ“€:")
249
+ gr.Markdown("이 λͺ¨λΈλ“€μ€ ν˜„μž¬ λ²„μ „μ—μ„œ μž‘λ™ν•˜μ§€ μ•Šμ„ 수 μžˆμŠ΅λ‹ˆλ‹€.")
250
  with gr.Row():
251
+ problematic_models = [
252
+ "google/gemma-3-12b-it",
253
  "meta-llama/Llama-2-7b-hf",
 
254
  "mistralai/Mistral-7B-v0.1"
255
  ]
256
 
257
+ for model in problematic_models:
258
  btn = gr.Button(model, size="sm")
259
  btn.click(lambda x=model: x, outputs=model_input)
260
 
261
+ # μ‚¬μš©λ²• κ°€μ΄λ“œ
262
+ with gr.Accordion("πŸ“– μ‚¬μš©λ²• κ°€μ΄λ“œ", open=False):
263
  gr.Markdown("""
264
+ ### κΈ°λ³Έ μ‚¬μš©λ²•:
265
+ 1. **λͺ¨λΈλͺ… μž…λ ₯**: ν—ˆκΉ…νŽ˜μ΄μŠ€ λͺ¨λΈλͺ… (예: klue/bert-base)
266
+ 2. **ν…μŠ€νŠΈ μž…λ ₯**: 토큰 수λ₯Ό 계산할 ν…μŠ€νŠΈ
267
+ 3. **계산 λ²„νŠΌ 클릭**: κ²°κ³Ό 확인
268
+
269
  ### 토큰이 ν•„μš”ν•œ 경우:
270
+ - Gated λͺ¨λΈ (Meta Llama, Google Gemma λ“±)
271
+ - λΉ„κ³΅κ°œ λͺ¨λΈ
272
 
273
+ ### 토큰 생성:
274
+ 1. [ν—ˆκΉ…νŽ˜μ΄μŠ€ 토큰 νŽ˜μ΄μ§€](https://huggingface.co/settings/tokens) λ°©λ¬Έ
275
+ 2. "New token" 생성 (Read κΆŒν•œ)
276
+ 3. 토큰을 μœ„μ˜ ν•„λ“œμ— μž…λ ₯
 
277
 
278
+ ### 문제 ν•΄κ²°:
279
+ - λͺ¨λΈμ΄ μ§€μ›λ˜μ§€ μ•ŠλŠ” 경우 λ‹€λ₯Έ λͺ¨λΈ μ‹œλ„
280
+ - λ„€νŠΈμ›Œν¬ 문제 μ‹œ μž μ‹œ ν›„ μž¬μ‹œλ„
281
+ - μ•ˆμ •μ μΈ λͺ¨λΈ νƒ­μ˜ λͺ¨λΈλ“€ μ‚¬μš© ꢌμž₯
282
  """)
283
 
284
  # 이벀트 ν•Έλ“€λŸ¬
 
294
  outputs=output
295
  )
296
 
 
297
  text_input.submit(
298
  count_tokens,
299
  inputs=[model_input, text_input, token_input],