Spaces:

kimhyunwoo
/

KOONE

Runtime error

App Files Files Community

kimhyunwoo commited on Feb 11

Commit

e9d635e

verified ·

1 Parent(s): 45e8e26

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -93

app.py CHANGED Viewed

@@ -1,97 +1,56 @@
 import gradio as gr
-import os
 from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
-MODEL_NAME = "kimhyunwoo/KOONE"
-MODEL_FILE = "KOONE-3.5-7.8B-Instruct-Q4_K_M.gguf"
-MODEL_PATH = f"./{MODEL_FILE}"  # 모델을 현재 디렉토리에 저장
-MODEL_DOWNLOADED = False  # 모델 다운로드 여부 추적
-def download_model():
-    """모델을 다운로드합니다. 이미 다운로드되었으면 건너뜁니다."""
-    global MODEL_DOWNLOADED
-    if not os.path.exists(MODEL_PATH):
-        try:
-            print("모델 다운로드 시작...")
-            hf_hub_download(
-                repo_id=MODEL_NAME,
-                filename=MODEL_FILE,
-                local_dir=".",
-                local_dir_use_symlinks=False  # Spaces 환경에서 필요할 수 있음
-            )
-            print("모델 다운로드 완료")
-            MODEL_DOWNLOADED = True
-            return "모델 다운로드 완료. 모델을 로드 중입니다..."
-        except Exception as e:
-            print(f"모델 다운로드 실패: {e}")
-            return f"모델 다운로드 실패: {e}. 다시 시도해주세요."
-    else:
-        print("모델이 이미 다운로드되어 있습니다.")
-        MODEL_DOWNLOADED = True
-        return "모델이 이미 다운로드됨. 모델을 로드 중입니다..."
-llm = None # 전역 변수로 llm 정의
-def load_model():
-    global llm, MODEL_DOWNLOADED
-    if llm is None: # 모델이 아직 로드되지 않았으면 로드
-        if not MODEL_DOWNLOADED: # 아직 다운로드되지 않았다면 다운로드 먼저 시도
-            download_message = download_model()
-            if "실패" in download_message: # 다운로드 실패 시 메시지 반환
-                return download_message
-        if not os.path.exists(MODEL_PATH): # 다운로드 후에도 파일이 없으면 에러
-            return "모델 파일 다운로드 실패. 파일 경로를 확인해주세요."
-        try:
-            print("모델 로드 시작...")
-            llm = Llama(model_path=MODEL_PATH)
-            print("모델 로드 완료")
-            return "모델 준비 완료" # 성공 메시지 반환
-        except Exception as e:
-            print(f"모델 로드 실패: {e}")
-            return f"모델 로드 실패: {e}"
-    return "모델 준비 완료" # 이미 로드된 경우 성공 메시지 반환
-def generate_text(prompt, system_prompt="You are KOONE model from LG AI Research, a helpful assistant.", max_tokens=256):
-    """ llama_cpp_python 라이브러리를 사용하여 텍스트를 생성합니다."""
-    global llm
-    load_model_message = load_model() # 모델 로드 및 상태 확인
-    if load_model_message != "모델 준비 완료":
-        return load_model_message # 로드 실패 메시지 반환
-    try:
-        output = llm.create_chat_completion(
-            messages = [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": prompt}
-            ],
-            max_tokens=max_tokens,
-            stop=["User:", "\nUser:", "</s>"] # stop words 추가
-        )
-        response_text = output['choices'][0]['message']['content'].strip()
-        return response_text
-    except Exception as e:
-        print(f"텍스트 생성 실패: {e}")
-        return f"텍스트 생성 실패: {e}"
-if __name__ == "__main__":
-    # 앱 시작 시 모델 로드 시도 (선택 사항, Gradio 앱에서는 첫 요청 시 로드하는 것이 일반적)
-    # load_model() # 앱 시작 시 모델 로드하면 첫 응답이 빠르지만, 로딩 시간이 길어질 수 있음
-    iface = gr.Interface(
-        fn=generate_text,
-        inputs=[
-            gr.Textbox(lines=5, placeholder="Enter your prompt here", label="User Prompt"),
-            gr.Textbox(lines=3, value="You are KOONE model from LG AI Research, a helpful assistant.", label="System Prompt", type="text"),
-            gr.Slider(minimum=32, maximum=512, step=32, value=256, label="Max New Tokens")
-        ],
-        outputs=gr.Textbox(label="Response"),
-        title="KOONE 3.5 7.8B Instruct GGUF Demo (llama-cpp-python)",
-        description="KOONE 3.5 7.8B Instruct 모델을 사용하여 텍스트를 생성합니다. 모델은 처음 실행 시 다운로드 및 로드되며, 시간이 다소 소요될 수 있습니다. `llama-cpp-python` 라이브러리를 사용합니다.",
     )
-    iface.launch()

 import gradio as gr
 from llama_cpp import Llama
+# 모델 파일 경로 (Hugging Face Hub에서 다운로드)
+MODEL_REPO_ID = "kimhyunwoo/KOONE"
+MODEL_FILENAME = "KOONE-3.5-2.4B-Instruct-Q4_K_M.gguf"
+# Llama 객체 생성 (CPU만 사용하므로 n_gpu_layers는 0 또는 설정하지 않음)
+# n_threads를 시스템 CPU 코어 수에 맞게 조절 (또는 생략하여 자동 설정)
+llm = Llama(
+    model_path="",  # model_path는 비워두고 from_pretrained 사용
+    repo_id=MODEL_REPO_ID,
+    filename=MODEL_FILENAME,
+    n_ctx=2048,  # 컨텍스트 길이. 모델에 맞게 설정.
+    n_threads=8,    # CPU 쓰레드 수 (���스템에 맞게 조절)
+    verbose=False, # 필요하면 True로 변경
+)
+def generate_text(prompt, system_prompt, max_tokens, temperature, top_p):
+    """모델에 프롬프트를 입력하고 생성된 텍스트를 반환합니다."""
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt},
+    ]
+    output = llm.create_chat_completion(
+        messages=messages,
+        max_tokens=max_tokens,
+        temperature=temperature,
+        top_p=top_p,
+        stream=False, # 스트리밍 사용 안 함
+        echo=False, # 입력 프롬프트는 출력하지 않음
     )
+    generated_text = output["choices"][0]["message"]["content"]
+    return generated_text
+# Gradio 인터페이스 정의
+iface = gr.Interface(
+    fn=generate_text,
+    inputs=[
+        gr.Textbox(lines=5, label="Prompt (질문)"),
+        gr.Textbox(lines=2, label="System Prompt (선택 사항)", value="당신은 도움이 되는 한국어 어시스턴트입니다."), #기본 시스템 프롬프트
+        gr.Slider(minimum=16, maximum=512, step=16, label="Max Tokens", value=128),
+        gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Temperature", value=0.7),
+        gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Top P", value=0.9),
+    ],
+    outputs=gr.Textbox(label="Generated Text (답변)"),
+    title="KOONE Chatbot (CPU Only)",
+    description="질문을 입력하고 Submit을 클릭하여 답변을 생성하세요.",
+)
+iface.launch()