kimhyunwoo commited on
Commit
e9d635e
Β·
verified Β·
1 Parent(s): 45e8e26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -93
app.py CHANGED
@@ -1,97 +1,56 @@
1
  import gradio as gr
2
- import os
3
  from llama_cpp import Llama
4
- from huggingface_hub import hf_hub_download
5
 
6
- MODEL_NAME = "kimhyunwoo/KOONE"
7
- MODEL_FILE = "KOONE-3.5-7.8B-Instruct-Q4_K_M.gguf"
8
- MODEL_PATH = f"./{MODEL_FILE}" # λͺ¨λΈμ„ ν˜„μž¬ 디렉토리에 μ €μž₯
9
- MODEL_DOWNLOADED = False # λͺ¨λΈ λ‹€μš΄λ‘œλ“œ μ—¬λΆ€ 좔적
10
-
11
- def download_model():
12
- """λͺ¨λΈμ„ λ‹€μš΄λ‘œλ“œν•©λ‹ˆλ‹€. 이미 λ‹€μš΄λ‘œλ“œλ˜μ—ˆμœΌλ©΄ κ±΄λ„ˆλœλ‹ˆλ‹€."""
13
- global MODEL_DOWNLOADED
14
- if not os.path.exists(MODEL_PATH):
15
- try:
16
- print("λͺ¨λΈ λ‹€μš΄λ‘œλ“œ μ‹œμž‘...")
17
- hf_hub_download(
18
- repo_id=MODEL_NAME,
19
- filename=MODEL_FILE,
20
- local_dir=".",
21
- local_dir_use_symlinks=False # Spaces ν™˜κ²½μ—μ„œ ν•„μš”ν•  수 있음
22
- )
23
- print("λͺ¨λΈ λ‹€μš΄λ‘œλ“œ μ™„λ£Œ")
24
- MODEL_DOWNLOADED = True
25
- return "λͺ¨λΈ λ‹€μš΄λ‘œλ“œ μ™„λ£Œ. λͺ¨λΈμ„ λ‘œλ“œ μ€‘μž…λ‹ˆλ‹€..."
26
- except Exception as e:
27
- print(f"λͺ¨λΈ λ‹€μš΄λ‘œλ“œ μ‹€νŒ¨: {e}")
28
- return f"λͺ¨λΈ λ‹€μš΄λ‘œλ“œ μ‹€νŒ¨: {e}. λ‹€μ‹œ μ‹œλ„ν•΄μ£Όμ„Έμš”."
29
- else:
30
- print("λͺ¨λΈμ΄ 이미 λ‹€μš΄λ‘œλ“œλ˜μ–΄ μžˆμŠ΅λ‹ˆλ‹€.")
31
- MODEL_DOWNLOADED = True
32
- return "λͺ¨λΈμ΄ 이미 λ‹€μš΄λ‘œλ“œλ¨. λͺ¨λΈμ„ λ‘œλ“œ μ€‘μž…λ‹ˆλ‹€..."
33
-
34
- llm = None # μ „μ—­ λ³€μˆ˜λ‘œ llm μ •μ˜
35
-
36
- def load_model():
37
- global llm, MODEL_DOWNLOADED
38
- if llm is None: # λͺ¨λΈμ΄ 아직 λ‘œλ“œλ˜μ§€ μ•Šμ•˜μœΌλ©΄ λ‘œλ“œ
39
- if not MODEL_DOWNLOADED: # 아직 λ‹€μš΄λ‘œλ“œλ˜μ§€ μ•Šμ•˜λ‹€λ©΄ λ‹€μš΄λ‘œλ“œ λ¨Όμ € μ‹œλ„
40
- download_message = download_model()
41
- if "μ‹€νŒ¨" in download_message: # λ‹€μš΄λ‘œλ“œ μ‹€νŒ¨ μ‹œ λ©”μ‹œμ§€ λ°˜ν™˜
42
- return download_message
43
-
44
- if not os.path.exists(MODEL_PATH): # λ‹€μš΄λ‘œλ“œ 후에도 파일이 μ—†μœΌλ©΄ μ—λŸ¬
45
- return "λͺ¨λΈ 파일 λ‹€μš΄λ‘œλ“œ μ‹€νŒ¨. 파일 경둜λ₯Ό ν™•μΈν•΄μ£Όμ„Έμš”."
46
-
47
- try:
48
- print("λͺ¨λΈ λ‘œλ“œ μ‹œμž‘...")
49
- llm = Llama(model_path=MODEL_PATH)
50
- print("λͺ¨λΈ λ‘œλ“œ μ™„λ£Œ")
51
- return "λͺ¨λΈ μ€€λΉ„ μ™„λ£Œ" # 성곡 λ©”μ‹œμ§€ λ°˜ν™˜
52
- except Exception as e:
53
- print(f"λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}")
54
- return f"λͺ¨λΈ λ‘œλ“œ μ‹€νŒ¨: {e}"
55
- return "λͺ¨λΈ μ€€λΉ„ μ™„λ£Œ" # 이미 λ‘œλ“œλœ 경우 성곡 λ©”μ‹œμ§€ λ°˜ν™˜
56
-
57
-
58
- def generate_text(prompt, system_prompt="You are KOONE model from LG AI Research, a helpful assistant.", max_tokens=256):
59
- """ llama_cpp_python 라이브러리λ₯Ό μ‚¬μš©ν•˜μ—¬ ν…μŠ€νŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€."""
60
- global llm
61
-
62
- load_model_message = load_model() # λͺ¨λΈ λ‘œλ“œ 및 μƒνƒœ 확인
63
- if load_model_message != "λͺ¨λΈ μ€€λΉ„ μ™„λ£Œ":
64
- return load_model_message # λ‘œλ“œ μ‹€νŒ¨ λ©”μ‹œμ§€ λ°˜ν™˜
65
-
66
- try:
67
- output = llm.create_chat_completion(
68
- messages = [
69
- {"role": "system", "content": system_prompt},
70
- {"role": "user", "content": prompt}
71
- ],
72
- max_tokens=max_tokens,
73
- stop=["User:", "\nUser:", "</s>"] # stop words μΆ”κ°€
74
- )
75
- response_text = output['choices'][0]['message']['content'].strip()
76
- return response_text
77
- except Exception as e:
78
- print(f"ν…μŠ€νŠΈ 생성 μ‹€νŒ¨: {e}")
79
- return f"ν…μŠ€νŠΈ 생성 μ‹€νŒ¨: {e}"
80
-
81
-
82
- if __name__ == "__main__":
83
- # μ•± μ‹œμž‘ μ‹œ λͺ¨λΈ λ‘œλ“œ μ‹œλ„ (선택 사항, Gradio μ•±μ—μ„œλŠ” 첫 μš”μ²­ μ‹œ λ‘œλ“œν•˜λŠ” 것이 일반적)
84
- # load_model() # μ•± μ‹œμž‘ μ‹œ λͺ¨λΈ λ‘œλ“œν•˜λ©΄ 첫 응닡이 λΉ λ₯΄μ§€λ§Œ, λ‘œλ”© μ‹œκ°„μ΄ κΈΈμ–΄μ§ˆ 수 있음
85
-
86
- iface = gr.Interface(
87
- fn=generate_text,
88
- inputs=[
89
- gr.Textbox(lines=5, placeholder="Enter your prompt here", label="User Prompt"),
90
- gr.Textbox(lines=3, value="You are KOONE model from LG AI Research, a helpful assistant.", label="System Prompt", type="text"),
91
- gr.Slider(minimum=32, maximum=512, step=32, value=256, label="Max New Tokens")
92
- ],
93
- outputs=gr.Textbox(label="Response"),
94
- title="KOONE 3.5 7.8B Instruct GGUF Demo (llama-cpp-python)",
95
- description="KOONE 3.5 7.8B Instruct λͺ¨λΈμ„ μ‚¬μš©ν•˜μ—¬ ν…μŠ€νŠΈλ₯Ό μƒμ„±ν•©λ‹ˆλ‹€. λͺ¨λΈμ€ 처음 μ‹€ν–‰ μ‹œ λ‹€μš΄λ‘œλ“œ 및 λ‘œλ“œλ˜λ©°, μ‹œκ°„μ΄ λ‹€μ†Œ μ†Œμš”λ  수 μžˆμŠ΅λ‹ˆλ‹€. `llama-cpp-python` 라이브러리λ₯Ό μ‚¬μš©ν•©λ‹ˆλ‹€.",
96
  )
97
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
 
2
  from llama_cpp import Llama
 
3
 
4
+ # λͺ¨λΈ 파일 경둜 (Hugging Face Hubμ—μ„œ λ‹€μš΄λ‘œλ“œ)
5
+ MODEL_REPO_ID = "kimhyunwoo/KOONE"
6
+ MODEL_FILENAME = "KOONE-3.5-2.4B-Instruct-Q4_K_M.gguf"
7
+
8
+ # Llama 객체 생성 (CPU만 μ‚¬μš©ν•˜λ―€λ‘œ n_gpu_layersλŠ” 0 λ˜λŠ” μ„€μ •ν•˜μ§€ μ•ŠμŒ)
9
+ # n_threadsλ₯Ό μ‹œμŠ€ν…œ CPU μ½”μ–΄ μˆ˜μ— 맞게 쑰절 (λ˜λŠ” μƒλž΅ν•˜μ—¬ μžλ™ μ„€μ •)
10
+ llm = Llama(
11
+ model_path="", # model_pathλŠ” λΉ„μ›Œλ‘κ³  from_pretrained μ‚¬μš©
12
+ repo_id=MODEL_REPO_ID,
13
+ filename=MODEL_FILENAME,
14
+ n_ctx=2048, # μ»¨ν…μŠ€νŠΈ 길이. λͺ¨λΈμ— 맞게 μ„€μ •.
15
+ n_threads=8, # CPU μ“°λ ˆλ“œ 수 (οΏ½οΏ½οΏ½μŠ€ν…œμ— 맞게 쑰절)
16
+ verbose=False, # ν•„μš”ν•˜λ©΄ True둜 λ³€κ²½
17
+ )
18
+
19
+
20
+ def generate_text(prompt, system_prompt, max_tokens, temperature, top_p):
21
+ """λͺ¨λΈμ— ν”„λ‘¬ν”„νŠΈλ₯Ό μž…λ ₯ν•˜κ³  μƒμ„±λœ ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€."""
22
+
23
+ messages = [
24
+ {"role": "system", "content": system_prompt},
25
+ {"role": "user", "content": prompt},
26
+ ]
27
+
28
+ output = llm.create_chat_completion(
29
+ messages=messages,
30
+ max_tokens=max_tokens,
31
+ temperature=temperature,
32
+ top_p=top_p,
33
+ stream=False, # 슀트리밍 μ‚¬μš© μ•ˆ 함
34
+ echo=False, # μž…λ ₯ ν”„λ‘¬ν”„νŠΈλŠ” 좜λ ₯ν•˜μ§€ μ•ŠμŒ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  )
36
+
37
+ generated_text = output["choices"][0]["message"]["content"]
38
+ return generated_text
39
+
40
+
41
+ # Gradio μΈν„°νŽ˜μ΄μŠ€ μ •μ˜
42
+ iface = gr.Interface(
43
+ fn=generate_text,
44
+ inputs=[
45
+ gr.Textbox(lines=5, label="Prompt (질문)"),
46
+ gr.Textbox(lines=2, label="System Prompt (선택 사항)", value="당신은 도움이 λ˜λŠ” ν•œκ΅­μ–΄ μ–΄μ‹œμŠ€ν„΄νŠΈμž…λ‹ˆλ‹€."), #κΈ°λ³Έ μ‹œμŠ€ν…œ ν”„λ‘¬ν”„νŠΈ
47
+ gr.Slider(minimum=16, maximum=512, step=16, label="Max Tokens", value=128),
48
+ gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Temperature", value=0.7),
49
+ gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Top P", value=0.9),
50
+ ],
51
+ outputs=gr.Textbox(label="Generated Text (λ‹΅λ³€)"),
52
+ title="KOONE Chatbot (CPU Only)",
53
+ description="μ§ˆλ¬Έμ„ μž…λ ₯ν•˜κ³  Submit을 ν΄λ¦­ν•˜μ—¬ 닡변을 μƒμ„±ν•˜μ„Έμš”.",
54
+ )
55
+
56
+ iface.launch()