Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,64 +1,97 @@
|
|
1 |
import gradio as gr
|
2 |
-
|
|
|
|
|
3 |
|
4 |
-
""
|
5 |
-
|
6 |
-
""
|
7 |
-
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
message,
|
12 |
-
history: list[tuple[str, str]],
|
13 |
-
system_message,
|
14 |
-
max_tokens,
|
15 |
-
temperature,
|
16 |
-
top_p,
|
17 |
-
):
|
18 |
-
messages = [{"role": "system", "content": system_message}]
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
if
|
24 |
-
|
|
|
|
|
25 |
|
26 |
-
|
|
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
for message in client.chat_completion(
|
31 |
-
messages,
|
32 |
-
max_tokens=max_tokens,
|
33 |
-
stream=True,
|
34 |
-
temperature=temperature,
|
35 |
-
top_p=top_p,
|
36 |
-
):
|
37 |
-
token = message.choices[0].delta.content
|
38 |
|
39 |
-
|
40 |
-
|
|
|
41 |
|
|
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
label="Top-p (nucleus sampling)",
|
58 |
-
),
|
59 |
-
],
|
60 |
-
)
|
61 |
|
62 |
|
63 |
if __name__ == "__main__":
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import os
|
3 |
+
from llama_cpp import Llama
|
4 |
+
from huggingface_hub import hf_hub_download
|
5 |
|
6 |
+
MODEL_NAME = "kimhyunwoo/KOONE"
|
7 |
+
MODEL_FILE = "KOONE-3.5-7.8B-Instruct-Q4_K_M.gguf"
|
8 |
+
MODEL_PATH = f"./{MODEL_FILE}" # λͺ¨λΈμ νμ¬ λλ ν 리μ μ μ₯
|
9 |
+
MODEL_DOWNLOADED = False # λͺ¨λΈ λ€μ΄λ‘λ μ¬λΆ μΆμ
|
10 |
|
11 |
+
def download_model():
|
12 |
+
"""λͺ¨λΈμ λ€μ΄λ‘λν©λλ€. μ΄λ―Έ λ€μ΄λ‘λλμμΌλ©΄ 건λλλλ€."""
|
13 |
+
global MODEL_DOWNLOADED
|
14 |
+
if not os.path.exists(MODEL_PATH):
|
15 |
+
try:
|
16 |
+
print("λͺ¨λΈ λ€μ΄λ‘λ μμ...")
|
17 |
+
hf_hub_download(
|
18 |
+
repo_id=MODEL_NAME,
|
19 |
+
filename=MODEL_FILE,
|
20 |
+
local_dir=".",
|
21 |
+
local_dir_use_symlinks=False # Spaces νκ²½μμ νμν μ μμ
|
22 |
+
)
|
23 |
+
print("λͺ¨λΈ λ€μ΄λ‘λ μλ£")
|
24 |
+
MODEL_DOWNLOADED = True
|
25 |
+
return "λͺ¨λΈ λ€μ΄λ‘λ μλ£. λͺ¨λΈμ λ‘λ μ€μ
λλ€..."
|
26 |
+
except Exception as e:
|
27 |
+
print(f"λͺ¨λΈ λ€μ΄λ‘λ μ€ν¨: {e}")
|
28 |
+
return f"λͺ¨λΈ λ€μ΄λ‘λ μ€ν¨: {e}. λ€μ μλν΄μ£ΌμΈμ."
|
29 |
+
else:
|
30 |
+
print("λͺ¨λΈμ΄ μ΄λ―Έ λ€μ΄λ‘λλμ΄ μμ΅λλ€.")
|
31 |
+
MODEL_DOWNLOADED = True
|
32 |
+
return "λͺ¨λΈμ΄ μ΄λ―Έ λ€μ΄λ‘λλ¨. λͺ¨λΈμ λ‘λ μ€μ
λλ€..."
|
33 |
|
34 |
+
llm = None # μ μ λ³μλ‘ llm μ μ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
+
def load_model():
|
37 |
+
global llm, MODEL_DOWNLOADED
|
38 |
+
if llm is None: # λͺ¨λΈμ΄ μμ§ λ‘λλμ§ μμμΌλ©΄ λ‘λ
|
39 |
+
if not MODEL_DOWNLOADED: # μμ§ λ€μ΄λ‘λλμ§ μμλ€λ©΄ λ€μ΄λ‘λ λ¨Όμ μλ
|
40 |
+
download_message = download_model()
|
41 |
+
if "μ€ν¨" in download_message: # λ€μ΄λ‘λ μ€ν¨ μ λ©μμ§ λ°ν
|
42 |
+
return download_message
|
43 |
|
44 |
+
if not os.path.exists(MODEL_PATH): # λ€μ΄λ‘λ νμλ νμΌμ΄ μμΌλ©΄ μλ¬
|
45 |
+
return "λͺ¨λΈ νμΌ λ€μ΄λ‘λ μ€ν¨. νμΌ κ²½λ‘λ₯Ό νμΈν΄μ£ΌμΈμ."
|
46 |
|
47 |
+
try:
|
48 |
+
print("λͺ¨λΈ λ‘λ μμ...")
|
49 |
+
llm = Llama(model_path=MODEL_PATH)
|
50 |
+
print("λͺ¨λΈ λ‘λ μλ£")
|
51 |
+
return "λͺ¨λΈ μ€λΉ μλ£" # μ±κ³΅ λ©μμ§ λ°ν
|
52 |
+
except Exception as e:
|
53 |
+
print(f"λͺ¨λΈ λ‘λ μ€ν¨: {e}")
|
54 |
+
return f"λͺ¨λΈ λ‘λ μ€ν¨: {e}"
|
55 |
+
return "λͺ¨λΈ μ€λΉ μλ£" # μ΄λ―Έ λ‘λλ κ²½μ° μ±κ³΅ λ©μμ§ λ°ν
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
def generate_text(prompt, system_prompt="You are KOONE model from LG AI Research, a helpful assistant.", max_tokens=256):
|
59 |
+
""" llama_cpp_python λΌμ΄λΈλ¬λ¦¬λ₯Ό μ¬μ©νμ¬ ν
μ€νΈλ₯Ό μμ±ν©λλ€."""
|
60 |
+
global llm
|
61 |
|
62 |
+
load_model_message = load_model() # λͺ¨λΈ λ‘λ λ° μν νμΈ
|
63 |
+
if load_model_message != "λͺ¨λΈ μ€λΉ μλ£":
|
64 |
+
return load_model_message # λ‘λ μ€ν¨ λ©μμ§ λ°ν
|
65 |
|
66 |
+
try:
|
67 |
+
output = llm.create_chat_completion(
|
68 |
+
messages = [
|
69 |
+
{"role": "system", "content": system_prompt},
|
70 |
+
{"role": "user", "content": prompt}
|
71 |
+
],
|
72 |
+
max_tokens=max_tokens,
|
73 |
+
stop=["User:", "\nUser:", "</s>"] # stop words μΆκ°
|
74 |
+
)
|
75 |
+
response_text = output['choices'][0]['message']['content'].strip()
|
76 |
+
return response_text
|
77 |
+
except Exception as e:
|
78 |
+
print(f"ν
μ€νΈ μμ± μ€ν¨: {e}")
|
79 |
+
return f"ν
μ€νΈ μοΏ½οΏ½οΏ½ μ€ν¨: {e}"
|
|
|
|
|
|
|
|
|
80 |
|
81 |
|
82 |
if __name__ == "__main__":
|
83 |
+
# μ± μμ μ λͺ¨λΈ λ‘λ μλ (μ ν μ¬ν, Gradio μ±μμλ 첫 μμ² μ λ‘λνλ κ²μ΄ μΌλ°μ )
|
84 |
+
# load_model() # μ± μμ μ λͺ¨λΈ λ‘λνλ©΄ 첫 μλ΅μ΄ λΉ λ₯΄μ§λ§, λ‘λ© μκ°μ΄ κΈΈμ΄μ§ μ μμ
|
85 |
+
|
86 |
+
iface = gr.Interface(
|
87 |
+
fn=generate_text,
|
88 |
+
inputs=[
|
89 |
+
gr.Textbox(lines=5, placeholder="Enter your prompt here", label="User Prompt"),
|
90 |
+
gr.Textbox(lines=3, value="You are KOONE model from LG AI Research, a helpful assistant.", label="System Prompt", type="text"),
|
91 |
+
gr.Slider(minimum=32, maximum=512, step=32, value=256, label="Max New Tokens")
|
92 |
+
],
|
93 |
+
outputs=gr.Textbox(label="Response"),
|
94 |
+
title="KOONE 3.5 7.8B Instruct GGUF Demo (llama-cpp-python)",
|
95 |
+
description="KOONE 3.5 7.8B Instruct λͺ¨λΈμ μ¬μ©νμ¬ ν
μ€νΈλ₯Ό μμ±ν©λλ€. λͺ¨λΈμ μ²μ μ€ν μ λ€μ΄λ‘λ λ° λ‘λλλ©°, μκ°μ΄ λ€μ μμλ μ μμ΅λλ€. `llama-cpp-python` λΌμ΄λΈλ¬λ¦¬λ₯Ό μ¬μ©ν©λλ€.",
|
96 |
+
)
|
97 |
+
iface.launch()
|