Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,39 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
def count_tokens(model_name, text, hf_token=None):
|
5 |
"""ν ν° μλ₯Ό κ³μ°νλ ν¨μ"""
|
@@ -7,50 +41,94 @@ def count_tokens(model_name, text, hf_token=None):
|
|
7 |
if not model_name or not text:
|
8 |
return "λͺ¨λΈλͺ
κ³Ό ν
μ€νΈλ₯Ό λͺ¨λ μ
λ ₯ν΄μ£ΌμΈμ."
|
9 |
|
10 |
-
#
|
11 |
-
from transformers import AutoTokenizer
|
12 |
-
|
13 |
-
# ν ν¬λμ΄μ λ‘λ (ν ν°μ΄ μμΌλ©΄ μ¬μ©)
|
14 |
-
tokenizer_kwargs = {"trust_remote_code": True}
|
15 |
if hf_token and hf_token.strip():
|
16 |
-
|
|
|
|
|
17 |
|
18 |
-
|
|
|
|
|
|
|
19 |
|
20 |
-
#
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
23 |
|
24 |
-
#
|
25 |
try:
|
26 |
-
|
27 |
except:
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
result = f"β
ν ν° μ: {token_count}\n\n"
|
31 |
-
result += f"
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
34 |
|
35 |
return result
|
36 |
|
37 |
except Exception as e:
|
38 |
-
|
|
|
39 |
|
40 |
-
if "
|
41 |
-
error_msg += "π
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
error_msg += f"1. https://huggingface.co/{model_name} μμ μ κ·Ό κΆνμ μμ²νμΈμ\n"
|
43 |
-
error_msg += "2. νκΉ
νμ΄μ€ ν ν°μ μ
λ ₯νμΈμ\n"
|
44 |
-
|
45 |
-
elif "does not exist" in str(e) or "not found" in str(e):
|
46 |
error_msg += "π λͺ¨λΈμ μ°Ύμ μ μμ΅λλ€:\n"
|
47 |
error_msg += "1. λͺ¨λΈλͺ
μ νμΈν΄μ£ΌμΈμ\n"
|
48 |
-
error_msg += "2. κ³΅κ° λͺ¨λΈ μμ: 'klue/bert-base', 'beomi/KcELECTRA-base', 'gpt2'\n
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
else:
|
50 |
error_msg += "π§ κ°λ₯ν ν΄κ²° λ°©λ²:\n"
|
51 |
error_msg += "1. λͺ¨λΈλͺ
μ νμΈν΄μ£ΌμΈμ\n"
|
52 |
error_msg += "2. λ€νΈμν¬ μ°κ²°μ νμΈν΄μ£ΌμΈμ\n"
|
53 |
-
error_msg += "3.
|
54 |
|
55 |
return error_msg
|
56 |
|
@@ -60,22 +138,55 @@ def check_model_access(model_name, hf_token=None):
|
|
60 |
if not model_name:
|
61 |
return "λͺ¨λΈλͺ
μ μ
λ ₯ν΄μ£ΌμΈμ."
|
62 |
|
63 |
-
from transformers import AutoTokenizer
|
64 |
-
|
65 |
-
tokenizer_kwargs = {"trust_remote_code": True}
|
66 |
if hf_token and hf_token.strip():
|
67 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
except Exception as e:
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
|
|
|
|
|
|
76 |
return f"β {model_name} λͺ¨λΈμ μ°Ύμ μ μμ΅λλ€."
|
|
|
|
|
77 |
else:
|
78 |
-
return f"β μ€λ₯: {
|
79 |
|
80 |
# Gradio μΈν°νμ΄μ€ μμ±
|
81 |
def create_interface():
|
@@ -100,7 +211,8 @@ def create_interface():
|
|
100 |
text_input = gr.Textbox(
|
101 |
label="ν
μ€νΈ",
|
102 |
placeholder="ν ν° μλ₯Ό κ³μ°ν ν
μ€νΈλ₯Ό μ
λ ₯νμΈμ...",
|
103 |
-
lines=5
|
|
|
104 |
)
|
105 |
|
106 |
with gr.Row():
|
@@ -110,57 +222,63 @@ def create_interface():
|
|
110 |
with gr.Column():
|
111 |
output = gr.Textbox(
|
112 |
label="κ²°κ³Ό",
|
113 |
-
lines=
|
114 |
show_copy_button=True
|
115 |
)
|
116 |
|
117 |
# λͺ¨λΈ μΉ΄ν
κ³ λ¦¬λ³ μμ
|
118 |
with gr.Tabs():
|
119 |
-
with gr.TabItem("
|
120 |
-
gr.Markdown("###
|
121 |
with gr.Row():
|
122 |
-
|
123 |
"klue/bert-base",
|
124 |
"beomi/KcELECTRA-base",
|
125 |
"gpt2",
|
126 |
-
"microsoft/DialoGPT-medium"
|
|
|
|
|
127 |
]
|
128 |
|
129 |
-
for model in
|
130 |
btn = gr.Button(model, size="sm")
|
131 |
btn.click(lambda x=model: x, outputs=model_input)
|
132 |
|
133 |
-
with gr.TabItem("
|
134 |
-
gr.Markdown("###
|
135 |
-
gr.Markdown("
|
136 |
with gr.Row():
|
137 |
-
|
|
|
138 |
"meta-llama/Llama-2-7b-hf",
|
139 |
-
"google/gemma-7b",
|
140 |
"mistralai/Mistral-7B-v0.1"
|
141 |
]
|
142 |
|
143 |
-
for model in
|
144 |
btn = gr.Button(model, size="sm")
|
145 |
btn.click(lambda x=model: x, outputs=model_input)
|
146 |
|
147 |
-
#
|
148 |
-
with gr.Accordion("
|
149 |
gr.Markdown("""
|
|
|
|
|
|
|
|
|
|
|
150 |
### ν ν°μ΄ νμν κ²½μ°:
|
151 |
-
|
152 |
-
|
153 |
|
154 |
-
### ν ν°
|
155 |
-
1. [νκΉ
νμ΄μ€ ν ν° νμ΄μ§](https://huggingface.co/settings/tokens)
|
156 |
-
2. "New token"
|
157 |
-
3.
|
158 |
-
4. μμ±λ ν ν°μ μμ "νκΉ
νμ΄μ€ ν ν°" νλμ μ
λ ₯
|
159 |
|
160 |
-
###
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
""")
|
165 |
|
166 |
# μ΄λ²€νΈ νΈλ€λ¬
|
@@ -176,7 +294,6 @@ def create_interface():
|
|
176 |
outputs=output
|
177 |
)
|
178 |
|
179 |
-
# μν°ν€λ‘λ μ€ν κ°λ₯νκ²
|
180 |
text_input.submit(
|
181 |
count_tokens,
|
182 |
inputs=[model_input, text_input, token_input],
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
+
import re
|
4 |
+
|
5 |
+
def validate_token(token):
|
6 |
+
"""ν ν° νμ μ ν¨μ± κ²μ¬"""
|
7 |
+
if not token:
|
8 |
+
return False
|
9 |
+
# νκΉ
νμ΄μ€ ν ν°μ λ³΄ν΅ hf_λ‘ μμνκ³ μνλ²³κ³Ό μ«μλ‘ κ΅¬μ±
|
10 |
+
return bool(re.match(r'^hf_[a-zA-Z0-9]{34,}$', token))
|
11 |
+
|
12 |
+
def safe_tokenizer_load(model_name, hf_token=None):
|
13 |
+
"""μμ ν ν ν¬λμ΄μ λ‘λ©"""
|
14 |
+
from transformers import AutoTokenizer
|
15 |
+
|
16 |
+
# λ€μν λ‘λ© μ λ΅ μλ
|
17 |
+
strategies = [
|
18 |
+
{"trust_remote_code": True, "use_fast": False},
|
19 |
+
{"trust_remote_code": True, "use_fast": True},
|
20 |
+
{"trust_remote_code": False, "use_fast": False},
|
21 |
+
{"trust_remote_code": False, "use_fast": True},
|
22 |
+
]
|
23 |
+
|
24 |
+
for strategy in strategies:
|
25 |
+
try:
|
26 |
+
tokenizer_kwargs = strategy.copy()
|
27 |
+
if hf_token and hf_token.strip():
|
28 |
+
tokenizer_kwargs["token"] = hf_token.strip()
|
29 |
+
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, **tokenizer_kwargs)
|
31 |
+
return tokenizer, None
|
32 |
+
except Exception as e:
|
33 |
+
last_error = e
|
34 |
+
continue
|
35 |
+
|
36 |
+
return None, last_error
|
37 |
|
38 |
def count_tokens(model_name, text, hf_token=None):
|
39 |
"""ν ν° μλ₯Ό κ³μ°νλ ν¨μ"""
|
|
|
41 |
if not model_name or not text:
|
42 |
return "λͺ¨λΈλͺ
κ³Ό ν
μ€νΈλ₯Ό λͺ¨λ μ
λ ₯ν΄μ£ΌμΈμ."
|
43 |
|
44 |
+
# ν ν° κ²μ¦
|
|
|
|
|
|
|
|
|
45 |
if hf_token and hf_token.strip():
|
46 |
+
token = hf_token.strip()
|
47 |
+
if not validate_token(token):
|
48 |
+
return "β ν ν° νμμ΄ μ¬λ°λ₯΄μ§ μμ΅λλ€. νκΉ
νμ΄μ€ ν ν°μ 'hf_'λ‘ μμν΄μΌ ν©λλ€."
|
49 |
|
50 |
+
# μμ ν ν ν¬λμ΄μ λ‘λ©
|
51 |
+
tokenizer, error = safe_tokenizer_load(model_name, hf_token)
|
52 |
+
if tokenizer is None:
|
53 |
+
raise error
|
54 |
|
55 |
+
# ν¨λ© ν ν°μ΄ μλ κ²½μ° μΆκ°
|
56 |
+
if tokenizer.pad_token is None:
|
57 |
+
if tokenizer.eos_token:
|
58 |
+
tokenizer.pad_token = tokenizer.eos_token
|
59 |
+
elif tokenizer.unk_token:
|
60 |
+
tokenizer.pad_token = tokenizer.unk_token
|
61 |
|
62 |
+
# ν ν°ν - μ¬λ¬ λ°©λ² μλ
|
63 |
try:
|
64 |
+
tokens = tokenizer.encode(text, add_special_tokens=True)
|
65 |
except:
|
66 |
+
# λ μμ ν λ°©λ²μΌλ‘ μλ
|
67 |
+
tokens = tokenizer.encode(text, add_special_tokens=False)
|
68 |
+
|
69 |
+
token_count = len(tokens)
|
70 |
+
|
71 |
+
# ν ν° λμ½λ© (μμ νκ² μ²λ¦¬)
|
72 |
+
decoded_tokens = []
|
73 |
+
for i, token in enumerate(tokens[:50]): # μ²μ 50κ°λ§
|
74 |
+
try:
|
75 |
+
decoded = tokenizer.decode([token])
|
76 |
+
if decoded.strip() == '':
|
77 |
+
decoded_tokens.append(f"<empty_{token}>")
|
78 |
+
elif decoded.strip() == tokenizer.pad_token:
|
79 |
+
decoded_tokens.append(f"<pad_{token}>")
|
80 |
+
elif decoded.strip() == tokenizer.eos_token:
|
81 |
+
decoded_tokens.append(f"<eos_{token}>")
|
82 |
+
elif decoded.strip() == tokenizer.bos_token:
|
83 |
+
decoded_tokens.append(f"<bos_{token}>")
|
84 |
+
else:
|
85 |
+
decoded_tokens.append(repr(decoded))
|
86 |
+
except:
|
87 |
+
decoded_tokens.append(f"<token_{token}>")
|
88 |
|
89 |
result = f"β
ν ν° μ: {token_count}\n\n"
|
90 |
+
result += f"λͺ¨λΈ: {model_name}\n"
|
91 |
+
result += f"ν
μ€νΈ κΈΈμ΄: {len(text)} κΈμ\n"
|
92 |
+
result += f"ν ν¬λμ΄μ νμ
: {type(tokenizer).__name__}\n\n"
|
93 |
+
result += f"ν ν°λ€ (μ²μ 50κ°):\n{decoded_tokens}"
|
94 |
+
|
95 |
+
if len(tokens) > 50:
|
96 |
+
result += f"\n\n... (μ΄ {len(tokens)}κ° ν ν° μ€ 50κ°λ§ νμ)"
|
97 |
|
98 |
return result
|
99 |
|
100 |
except Exception as e:
|
101 |
+
error_str = str(e)
|
102 |
+
error_msg = f"β μ€λ₯ λ°μ: {error_str}\n\n"
|
103 |
|
104 |
+
if "401" in error_str and "Unauthorized" in error_str:
|
105 |
+
error_msg += "π μΈμ¦ μ€λ₯:\n"
|
106 |
+
error_msg += "1. νκΉ
νμ΄μ€ ν ν°μ΄ μ¬λ°λ₯Έμ§ νμΈνμΈμ\n"
|
107 |
+
error_msg += "2. ν ν°μ΄ 'hf_'λ‘ μμνλμ§ νμΈνμΈμ\n"
|
108 |
+
error_msg += "3. ν΄λΉ λͺ¨λΈμ λν μ κ·Ό κΆνμ΄ μλμ§ νμΈνμΈμ\n"
|
109 |
+
error_msg += f"4. λͺ¨λΈ νμ΄μ§ λ°©λ¬Έ: https://huggingface.co/{model_name}\n"
|
110 |
+
elif "gated repo" in error_str:
|
111 |
+
error_msg += "π μ κ·Ό μ νλ λͺ¨λΈ:\n"
|
112 |
error_msg += f"1. https://huggingface.co/{model_name} μμ μ κ·Ό κΆνμ μμ²νμΈμ\n"
|
113 |
+
error_msg += "2. μΉμΈ ν μ ν¨ν νκΉ
νμ΄μ€ ν ν°μ μ
λ ₯νμΈμ\n"
|
114 |
+
elif "does not exist" in error_str or "not found" in error_str:
|
|
|
115 |
error_msg += "π λͺ¨λΈμ μ°Ύμ μ μμ΅λλ€:\n"
|
116 |
error_msg += "1. λͺ¨λΈλͺ
μ νμΈν΄μ£ΌμΈμ\n"
|
117 |
+
error_msg += "2. κ³΅κ° λͺ¨λΈ μμ: 'klue/bert-base', 'beomi/KcELECTRA-base', 'gpt2'\n"
|
118 |
+
elif "data did not match any variant" in error_str:
|
119 |
+
error_msg += "β οΈ λͺ¨λΈ νμΌ κ΅¬μ‘° λ¬Έμ :\n"
|
120 |
+
error_msg += "1. μ΄ λͺ¨λΈμ νμ¬ μ§μλμ§ μλ νμμ
λλ€\n"
|
121 |
+
error_msg += "2. λ€λ₯Έ λͺ¨λΈμ μλν΄λ³΄μΈμ\n"
|
122 |
+
error_msg += "3. μΆμ² λͺ¨λΈ: 'gpt2', 'microsoft/DialoGPT-medium', 'klue/bert-base'\n"
|
123 |
+
elif "Tokenizer class" in error_str:
|
124 |
+
error_msg += "π§ ν ν¬λμ΄μ ν΄λμ€ λ¬Έμ :\n"
|
125 |
+
error_msg += "1. μ΄ λͺ¨λΈμ μ΅μ transformers λΌμ΄λΈλ¬λ¦¬κ° νμν μ μμ΅λλ€\n"
|
126 |
+
error_msg += "2. λ€λ₯Έ λͺ¨λΈμ μλν΄λ³΄μΈμ\n"
|
127 |
else:
|
128 |
error_msg += "π§ κ°λ₯ν ν΄κ²° λ°©λ²:\n"
|
129 |
error_msg += "1. λͺ¨λΈλͺ
μ νμΈν΄μ£ΌμΈμ\n"
|
130 |
error_msg += "2. λ€νΈμν¬ μ°κ²°μ νμΈν΄μ£ΌμΈμ\n"
|
131 |
+
error_msg += "3. λ€λ₯Έ λͺ¨λΈμ μλν΄λ³΄μΈμ\n"
|
132 |
|
133 |
return error_msg
|
134 |
|
|
|
138 |
if not model_name:
|
139 |
return "λͺ¨λΈλͺ
μ μ
λ ₯ν΄μ£ΌμΈμ."
|
140 |
|
|
|
|
|
|
|
141 |
if hf_token and hf_token.strip():
|
142 |
+
token = hf_token.strip()
|
143 |
+
if not validate_token(token):
|
144 |
+
return "β ν ν° νμμ΄ μ¬λ°λ₯΄μ§ μμ΅λλ€. νκΉ
νμ΄μ€ ν ν°μ 'hf_'λ‘ μμν΄μΌ ν©λλ€."
|
145 |
+
|
146 |
+
# μμ ν ν ν¬λμ΄μ λ‘λ©
|
147 |
+
tokenizer, error = safe_tokenizer_load(model_name, hf_token)
|
148 |
+
if tokenizer is None:
|
149 |
+
raise error
|
150 |
+
|
151 |
+
# ν ν¬λμ΄μ μ 보 νμ
|
152 |
+
vocab_size = getattr(tokenizer, 'vocab_size', "μ μ μμ")
|
153 |
+
model_max_length = getattr(tokenizer, 'model_max_length', "μ μ μμ")
|
154 |
+
|
155 |
+
result = f"β
{model_name} λͺ¨λΈ μ κ·Ό κ°λ₯ν©λλ€!\n\n"
|
156 |
+
result += f"ν ν¬λμ΄μ μ 보:\n"
|
157 |
+
result += f"- μ΄ν ν¬κΈ°: {vocab_size}\n"
|
158 |
+
result += f"- μ΅λ κΈΈμ΄: {model_max_length}\n"
|
159 |
+
result += f"- ν ν¬λμ΄μ νμ
: {type(tokenizer).__name__}\n"
|
160 |
|
161 |
+
# νΉμ ν ν° μ 보
|
162 |
+
special_tokens = []
|
163 |
+
if hasattr(tokenizer, 'pad_token') and tokenizer.pad_token:
|
164 |
+
special_tokens.append(f"PAD: {tokenizer.pad_token}")
|
165 |
+
if hasattr(tokenizer, 'eos_token') and tokenizer.eos_token:
|
166 |
+
special_tokens.append(f"EOS: {tokenizer.eos_token}")
|
167 |
+
if hasattr(tokenizer, 'bos_token') and tokenizer.bos_token:
|
168 |
+
special_tokens.append(f"BOS: {tokenizer.bos_token}")
|
169 |
+
if hasattr(tokenizer, 'unk_token') and tokenizer.unk_token:
|
170 |
+
special_tokens.append(f"UNK: {tokenizer.unk_token}")
|
171 |
+
|
172 |
+
if special_tokens:
|
173 |
+
result += f"- νΉμ ν ν°: {', '.join(special_tokens)}"
|
174 |
+
|
175 |
+
return result
|
176 |
|
177 |
except Exception as e:
|
178 |
+
error_str = str(e)
|
179 |
+
|
180 |
+
if "401" in error_str and "Unauthorized" in error_str:
|
181 |
+
return f"π {model_name}: μΈμ¦ μ€λ₯μ
λλ€. ν ν°μ νμΈνκ±°λ λͺ¨λΈ μ κ·Ό κΆνμ μμ²νμΈμ."
|
182 |
+
elif "gated repo" in error_str:
|
183 |
+
return f"π {model_name}μ μ κ·Ό κΆνμ΄ νμν λͺ¨λΈμ
λλ€."
|
184 |
+
elif "does not exist" in error_str:
|
185 |
return f"β {model_name} λͺ¨λΈμ μ°Ύμ μ μμ΅λλ€."
|
186 |
+
elif "data did not match any variant" in error_str:
|
187 |
+
return f"β οΈ {model_name} λͺ¨λΈμ νμ¬ μ§μλμ§ μλ νμμ
λλ€."
|
188 |
else:
|
189 |
+
return f"β μ€λ₯: {error_str}"
|
190 |
|
191 |
# Gradio μΈν°νμ΄μ€ μμ±
|
192 |
def create_interface():
|
|
|
211 |
text_input = gr.Textbox(
|
212 |
label="ν
μ€νΈ",
|
213 |
placeholder="ν ν° μλ₯Ό κ³μ°ν ν
μ€νΈλ₯Ό μ
λ ₯νμΈμ...",
|
214 |
+
lines=5,
|
215 |
+
value="μλ
νμΈμ! μ΄κ²μ ν
μ€νΈ ν
μ€νΈμ
λλ€."
|
216 |
)
|
217 |
|
218 |
with gr.Row():
|
|
|
222 |
with gr.Column():
|
223 |
output = gr.Textbox(
|
224 |
label="κ²°κ³Ό",
|
225 |
+
lines=15,
|
226 |
show_copy_button=True
|
227 |
)
|
228 |
|
229 |
# λͺ¨λΈ μΉ΄ν
κ³ λ¦¬λ³ μμ
|
230 |
with gr.Tabs():
|
231 |
+
with gr.TabItem("β
μμ μ μΈ λͺ¨λΈ"):
|
232 |
+
gr.Markdown("### νμ€ν μλνλ λͺ¨λΈλ€:")
|
233 |
with gr.Row():
|
234 |
+
stable_models = [
|
235 |
"klue/bert-base",
|
236 |
"beomi/KcELECTRA-base",
|
237 |
"gpt2",
|
238 |
+
"microsoft/DialoGPT-medium",
|
239 |
+
"distilbert-base-uncased",
|
240 |
+
"t5-small"
|
241 |
]
|
242 |
|
243 |
+
for model in stable_models:
|
244 |
btn = gr.Button(model, size="sm")
|
245 |
btn.click(lambda x=model: x, outputs=model_input)
|
246 |
|
247 |
+
with gr.TabItem("β οΈ λ¬Έμ κ° μμ μ μλ λͺ¨λΈ"):
|
248 |
+
gr.Markdown("### μ§μλμ§ μκ±°λ λ¬Έμ κ° μμ μ μλ λͺ¨λΈλ€:")
|
249 |
+
gr.Markdown("μ΄ λͺ¨λΈλ€μ νμ¬ λ²μ μμ μλνμ§ μμ μ μμ΅λλ€.")
|
250 |
with gr.Row():
|
251 |
+
problematic_models = [
|
252 |
+
"google/gemma-3-12b-it",
|
253 |
"meta-llama/Llama-2-7b-hf",
|
|
|
254 |
"mistralai/Mistral-7B-v0.1"
|
255 |
]
|
256 |
|
257 |
+
for model in problematic_models:
|
258 |
btn = gr.Button(model, size="sm")
|
259 |
btn.click(lambda x=model: x, outputs=model_input)
|
260 |
|
261 |
+
# μ¬μ©λ² κ°μ΄λ
|
262 |
+
with gr.Accordion("π μ¬μ©λ² κ°μ΄λ", open=False):
|
263 |
gr.Markdown("""
|
264 |
+
### κΈ°λ³Έ μ¬μ©λ²:
|
265 |
+
1. **λͺ¨λΈλͺ
μ
λ ₯**: νκΉ
νμ΄μ€ λͺ¨λΈλͺ
(μ: klue/bert-base)
|
266 |
+
2. **ν
μ€νΈ μ
λ ₯**: ν ν° μλ₯Ό κ³μ°ν ν
μ€νΈ
|
267 |
+
3. **κ³μ° λ²νΌ ν΄λ¦**: κ²°κ³Ό νμΈ
|
268 |
+
|
269 |
### ν ν°μ΄ νμν κ²½μ°:
|
270 |
+
- Gated λͺ¨λΈ (Meta Llama, Google Gemma λ±)
|
271 |
+
- λΉκ³΅κ° λͺ¨λΈ
|
272 |
|
273 |
+
### ν ν° μμ±:
|
274 |
+
1. [νκΉ
νμ΄μ€ ν ν° νμ΄μ§](https://huggingface.co/settings/tokens) λ°©λ¬Έ
|
275 |
+
2. "New token" μμ± (Read κΆν)
|
276 |
+
3. ν ν°μ μμ νλμ μ
λ ₯
|
|
|
277 |
|
278 |
+
### λ¬Έμ ν΄κ²°:
|
279 |
+
- λͺ¨λΈμ΄ μ§μλμ§ μλ κ²½μ° λ€λ₯Έ λͺ¨λΈ μλ
|
280 |
+
- λ€νΈμν¬ λ¬Έμ μ μ μ ν μ¬μλ
|
281 |
+
- μμ μ μΈ λͺ¨λΈ νμ λͺ¨λΈλ€ μ¬μ© κΆμ₯
|
282 |
""")
|
283 |
|
284 |
# μ΄λ²€νΈ νΈλ€λ¬
|
|
|
294 |
outputs=output
|
295 |
)
|
296 |
|
|
|
297 |
text_input.submit(
|
298 |
count_tokens,
|
299 |
inputs=[model_input, text_input, token_input],
|