Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,97 +1,56 @@
|
|
1 |
import gradio as gr
|
2 |
-
import os
|
3 |
from llama_cpp import Llama
|
4 |
-
from huggingface_hub import hf_hub_download
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
global llm, MODEL_DOWNLOADED
|
38 |
-
if llm is None: # λͺ¨λΈμ΄ μμ§ λ‘λλμ§ μμμΌλ©΄ λ‘λ
|
39 |
-
if not MODEL_DOWNLOADED: # μμ§ λ€μ΄λ‘λλμ§ μμλ€λ©΄ λ€μ΄λ‘λ λ¨Όμ μλ
|
40 |
-
download_message = download_model()
|
41 |
-
if "μ€ν¨" in download_message: # λ€μ΄λ‘λ μ€ν¨ μ λ©μμ§ λ°ν
|
42 |
-
return download_message
|
43 |
-
|
44 |
-
if not os.path.exists(MODEL_PATH): # λ€μ΄λ‘λ νμλ νμΌμ΄ μμΌλ©΄ μλ¬
|
45 |
-
return "λͺ¨λΈ νμΌ λ€μ΄λ‘λ μ€ν¨. νμΌ κ²½λ‘λ₯Ό νμΈν΄μ£ΌμΈμ."
|
46 |
-
|
47 |
-
try:
|
48 |
-
print("λͺ¨λΈ λ‘λ μμ...")
|
49 |
-
llm = Llama(model_path=MODEL_PATH)
|
50 |
-
print("λͺ¨λΈ λ‘λ μλ£")
|
51 |
-
return "λͺ¨λΈ μ€λΉ μλ£" # μ±κ³΅ λ©μμ§ λ°ν
|
52 |
-
except Exception as e:
|
53 |
-
print(f"λͺ¨λΈ λ‘λ μ€ν¨: {e}")
|
54 |
-
return f"λͺ¨λΈ λ‘λ μ€ν¨: {e}"
|
55 |
-
return "λͺ¨λΈ μ€λΉ μλ£" # μ΄λ―Έ λ‘λλ κ²½μ° μ±κ³΅ λ©μμ§ λ°ν
|
56 |
-
|
57 |
-
|
58 |
-
def generate_text(prompt, system_prompt="You are KOONE model from LG AI Research, a helpful assistant.", max_tokens=256):
|
59 |
-
""" llama_cpp_python λΌμ΄λΈλ¬λ¦¬λ₯Ό μ¬μ©νμ¬ ν
μ€νΈλ₯Ό μμ±ν©λλ€."""
|
60 |
-
global llm
|
61 |
-
|
62 |
-
load_model_message = load_model() # λͺ¨λΈ λ‘λ λ° μν νμΈ
|
63 |
-
if load_model_message != "λͺ¨λΈ μ€λΉ μλ£":
|
64 |
-
return load_model_message # λ‘λ μ€ν¨ λ©μμ§ λ°ν
|
65 |
-
|
66 |
-
try:
|
67 |
-
output = llm.create_chat_completion(
|
68 |
-
messages = [
|
69 |
-
{"role": "system", "content": system_prompt},
|
70 |
-
{"role": "user", "content": prompt}
|
71 |
-
],
|
72 |
-
max_tokens=max_tokens,
|
73 |
-
stop=["User:", "\nUser:", "</s>"] # stop words μΆκ°
|
74 |
-
)
|
75 |
-
response_text = output['choices'][0]['message']['content'].strip()
|
76 |
-
return response_text
|
77 |
-
except Exception as e:
|
78 |
-
print(f"ν
μ€νΈ μμ± μ€ν¨: {e}")
|
79 |
-
return f"ν
μ€νΈ μμ± μ€ν¨: {e}"
|
80 |
-
|
81 |
-
|
82 |
-
if __name__ == "__main__":
|
83 |
-
# μ± μμ μ λͺ¨λΈ λ‘λ μλ (μ ν μ¬ν, Gradio μ±μμλ 첫 μμ² μ λ‘λνλ κ²μ΄ μΌλ°μ )
|
84 |
-
# load_model() # μ± μμ μ λͺ¨λΈ λ‘λνλ©΄ 첫 μλ΅μ΄ λΉ λ₯΄μ§λ§, λ‘λ© μκ°μ΄ κΈΈμ΄μ§ μ μμ
|
85 |
-
|
86 |
-
iface = gr.Interface(
|
87 |
-
fn=generate_text,
|
88 |
-
inputs=[
|
89 |
-
gr.Textbox(lines=5, placeholder="Enter your prompt here", label="User Prompt"),
|
90 |
-
gr.Textbox(lines=3, value="You are KOONE model from LG AI Research, a helpful assistant.", label="System Prompt", type="text"),
|
91 |
-
gr.Slider(minimum=32, maximum=512, step=32, value=256, label="Max New Tokens")
|
92 |
-
],
|
93 |
-
outputs=gr.Textbox(label="Response"),
|
94 |
-
title="KOONE 3.5 7.8B Instruct GGUF Demo (llama-cpp-python)",
|
95 |
-
description="KOONE 3.5 7.8B Instruct λͺ¨λΈμ μ¬μ©νμ¬ ν
μ€νΈλ₯Ό μμ±ν©λλ€. λͺ¨λΈμ μ²μ μ€ν μ λ€μ΄λ‘λ λ° λ‘λλλ©°, μκ°μ΄ λ€μ μμλ μ μμ΅λλ€. `llama-cpp-python` λΌμ΄λΈλ¬λ¦¬λ₯Ό μ¬μ©ν©λλ€.",
|
96 |
)
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
|
|
2 |
from llama_cpp import Llama
|
|
|
3 |
|
4 |
+
# λͺ¨λΈ νμΌ κ²½λ‘ (Hugging Face Hubμμ λ€μ΄λ‘λ)
|
5 |
+
MODEL_REPO_ID = "kimhyunwoo/KOONE"
|
6 |
+
MODEL_FILENAME = "KOONE-3.5-2.4B-Instruct-Q4_K_M.gguf"
|
7 |
+
|
8 |
+
# Llama κ°μ²΄ μμ± (CPUλ§ μ¬μ©νλ―λ‘ n_gpu_layersλ 0 λλ μ€μ νμ§ μμ)
|
9 |
+
# n_threadsλ₯Ό μμ€ν
CPU μ½μ΄ μμ λ§κ² μ‘°μ (λλ μλ΅νμ¬ μλ μ€μ )
|
10 |
+
llm = Llama(
|
11 |
+
model_path="", # model_pathλ λΉμλκ³ from_pretrained μ¬μ©
|
12 |
+
repo_id=MODEL_REPO_ID,
|
13 |
+
filename=MODEL_FILENAME,
|
14 |
+
n_ctx=2048, # 컨ν
μ€νΈ κΈΈμ΄. λͺ¨λΈμ λ§κ² μ€μ .
|
15 |
+
n_threads=8, # CPU μ°λ λ μ (οΏ½οΏ½οΏ½μ€ν
μ λ§κ² μ‘°μ )
|
16 |
+
verbose=False, # νμνλ©΄ Trueλ‘ λ³κ²½
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
def generate_text(prompt, system_prompt, max_tokens, temperature, top_p):
|
21 |
+
"""λͺ¨λΈμ ν둬ννΈλ₯Ό μ
λ ₯νκ³ μμ±λ ν
μ€νΈλ₯Ό λ°νν©λλ€."""
|
22 |
+
|
23 |
+
messages = [
|
24 |
+
{"role": "system", "content": system_prompt},
|
25 |
+
{"role": "user", "content": prompt},
|
26 |
+
]
|
27 |
+
|
28 |
+
output = llm.create_chat_completion(
|
29 |
+
messages=messages,
|
30 |
+
max_tokens=max_tokens,
|
31 |
+
temperature=temperature,
|
32 |
+
top_p=top_p,
|
33 |
+
stream=False, # μ€νΈλ¦¬λ° μ¬μ© μ ν¨
|
34 |
+
echo=False, # μ
λ ₯ ν둬ννΈλ μΆλ ₯νμ§ μμ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
)
|
36 |
+
|
37 |
+
generated_text = output["choices"][0]["message"]["content"]
|
38 |
+
return generated_text
|
39 |
+
|
40 |
+
|
41 |
+
# Gradio μΈν°νμ΄μ€ μ μ
|
42 |
+
iface = gr.Interface(
|
43 |
+
fn=generate_text,
|
44 |
+
inputs=[
|
45 |
+
gr.Textbox(lines=5, label="Prompt (μ§λ¬Έ)"),
|
46 |
+
gr.Textbox(lines=2, label="System Prompt (μ ν μ¬ν)", value="λΉμ μ λμμ΄ λλ νκ΅μ΄ μ΄μμ€ν΄νΈμ
λλ€."), #κΈ°λ³Έ μμ€ν
ν둬ννΈ
|
47 |
+
gr.Slider(minimum=16, maximum=512, step=16, label="Max Tokens", value=128),
|
48 |
+
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Temperature", value=0.7),
|
49 |
+
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Top P", value=0.9),
|
50 |
+
],
|
51 |
+
outputs=gr.Textbox(label="Generated Text (λ΅λ³)"),
|
52 |
+
title="KOONE Chatbot (CPU Only)",
|
53 |
+
description="μ§λ¬Έμ μ
λ ₯νκ³ Submitμ ν΄λ¦νμ¬ λ΅λ³μ μμ±νμΈμ.",
|
54 |
+
)
|
55 |
+
|
56 |
+
iface.launch()
|