File size: 3,134 Bytes
1a69151
1f8e2b4
1a69151
 
1f8e2b4
1a69151
 
 
 
1f8e2b4
 
 
 
 
cccf1bf
1f8e2b4
 
cccf1bf
 
1f8e2b4
 
cccf1bf
1f8e2b4
1a69151
 
 
 
1f8e2b4
1a69151
1f8e2b4
 
1a69151
 
 
 
1f8e2b4
 
 
 
cccf1bf
1f8e2b4
1a69151
 
1f8e2b4
1a69151
1f8e2b4
1a69151
1f8e2b4
 
1a69151
 
 
 
 
1f8e2b4
 
1a69151
 
 
1f8e2b4
1a69151
 
 
 
 
cccf1bf
1f8e2b4
1a69151
 
 
1f8e2b4
 
1a69151
 
1f8e2b4
1a69151
1f8e2b4
 
 
 
 
 
 
1a69151
 
1f8e2b4
 
 
1a69151
 
 
 
 
8e9646f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
from transformers import AutoTokenizer

def count_tokens(model_name, text, hf_token=None):
    """ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ"""
    try:
        if not model_name or not text:
            return "๋ชจ๋ธ๋ช…๊ณผ ํ…์ŠคํŠธ๋ฅผ ๋ชจ๋‘ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
        
        # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
        tokenizer = AutoTokenizer.from_pretrained(
            model_name, 
            token=hf_token.strip() if hf_token and hf_token.strip() else None
        )
        
        # ํ† ํฐ ์ธ์ฝ”๋”ฉ
        tokens = tokenizer.encode(text)
        token_count = len(tokens)
        
        # ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜
        result = f"โœ… ํ† ํฐ ์ˆ˜: {token_count}\n"
        result += f"๋ชจ๋ธ: {model_name}\n"
        result += f"ํ…์ŠคํŠธ ๊ธธ์ด: {len(text)} ๊ธ€์ž"
        
        return result
        
    except Exception as e:
        return f"โŒ ์˜ค๋ฅ˜: {str(e)}"

def check_model(model_name, hf_token=None):
    """๋ชจ๋ธ ์ ‘๊ทผ ํ™•์ธ"""
    try:
        if not model_name:
            return "๋ชจ๋ธ๋ช…์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”."
        
        tokenizer = AutoTokenizer.from_pretrained(
            model_name,
            token=hf_token.strip() if hf_token and hf_token.strip() else None
        )
        
        return f"โœ… {model_name} ๋ชจ๋ธ ์ ‘๊ทผ ๊ฐ€๋Šฅ!"
        
    except Exception as e:
        return f"โŒ ์˜ค๋ฅ˜: {str(e)}"

# Gradio ์ธํ„ฐํŽ˜์ด์Šค
def create_interface():
    with gr.Blocks(title="ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ") as demo:
        gr.Markdown("# ๐Ÿ”ข ํ† ํฐ ๊ณ„์‚ฐ๊ธฐ")
        
        with gr.Row():
            with gr.Column():
                model_input = gr.Textbox(
                    label="๋ชจ๋ธ๋ช…",
                    placeholder="์˜ˆ: gpt2, klue/bert-base",
                    value="gpt2"
                )
                
                token_input = gr.Textbox(
                    label="HF ํ† ํฐ (์„ ํƒ์‚ฌํ•ญ)",
                    type="password"
                )
                
                text_input = gr.Textbox(
                    label="ํ…์ŠคํŠธ",
                    lines=5,
                    value="์•ˆ๋…•ํ•˜์„ธ์š”! ํ…Œ์ŠคํŠธ ํ…์ŠคํŠธ์ž…๋‹ˆ๋‹ค."
                )
                
                with gr.Row():
                    check_btn = gr.Button("๋ชจ๋ธ ํ™•์ธ")
                    calc_btn = gr.Button("ํ† ํฐ ๊ณ„์‚ฐ", variant="primary")
            
            with gr.Column():
                output = gr.Textbox(label="๊ฒฐ๊ณผ", lines=10)
        
        # ์ถ”์ฒœ ๋ชจ๋ธ
        gr.Markdown("### ์ถ”์ฒœ ๋ชจ๋ธ")
        with gr.Row():
            models = ["gpt2", "klue/bert-base", "microsoft/DialoGPT-medium"]
            for model in models:
                btn = gr.Button(model, size="sm")
                btn.click(lambda x=model: x, outputs=model_input)
        
        # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
        check_btn.click(check_model, [model_input, token_input], output)
        calc_btn.click(count_tokens, [model_input, text_input, token_input], output)
        text_input.submit(count_tokens, [model_input, text_input, token_input], output)
    
    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch()