Austin9
/

gemma-2-9b-it-Ko-RAG

@@ -1,237 +0,0 @@
-{
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
-  },
-  "cells": [
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "wDmLkBbZmJvB"
-      },
-      "outputs": [],
-      "source": [
-        "# ===============================\n",
-        "# 1. 라이브러리 설치 (Google Colab)\n",
-        "# ===============================\n",
-        "!pip install unsloth xformers faiss-gpu-cu12 -U\n",
-        "!pip install --no-deps --upgrade \"flash-attn>=2.6.3\"\n",
-        "!pip install -U hf_transfer"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# ===============================\n",
-        "# 2. 환경 설정\n",
-        "# ===============================\n",
-        "import os\n",
-        "import torch\n",
-        "import numpy as np\n",
-        "import faiss\n",
-        "import json\n",
-        "import ast\n",
-        "from transformers import TextStreamer\n",
-        "from sentence_transformers import SentenceTransformer\n",
-        "from unsloth import FastLanguageModel\n",
-        "\n",
-        "os.environ[\"HF_HUB_ENABLE_HF_TRANSFER\"] = \"1\""
-      ],
-      "metadata": {
-        "id": "OsEBB0aKmhBy"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# ===============================\n",
-        "# 3. 모델 로드\n",
-        "# ===============================\n",
-        "model, tokenizer = FastLanguageModel.from_pretrained(\n",
-        "    model_name=\"Austin9/gemma-2-9b-it-Ko-RAG\",\n",
-        "    max_seq_length=8192,\n",
-        "    dtype=torch.float16,\n",
-        "    load_in_4bit=True\n",
-        ")\n",
-        "FastLanguageModel.for_inference(model)"
-      ],
-      "metadata": {
-        "id": "ENT1FgZZmizd"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# ===============================\n",
-        "# 4. FAISS 인덱스 로드\n",
-        "# ===============================\n",
-        "vector_db_path = \"/content/chunked_data_vectors.npz\"\n",
-        "data = np.load(vector_db_path)\n",
-        "vectors, texts, titles = data[\"vectors\"], data[\"texts\"], data[\"titles\"]\n",
-        "\n",
-        "gpu_resources = faiss.StandardGpuResources()\n",
-        "faiss_index = faiss.GpuIndexFlatL2(gpu_resources, vectors.shape[1])\n",
-        "faiss_index.add(vectors)"
-      ],
-      "metadata": {
-        "id": "9H7Xcc9GmkQ8"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# ===============================\n",
-        "# 5. 임베딩 모델 로드\n",
-        "# ===============================\n",
-        "embedding_model = SentenceTransformer(\"nlpai-lab/KURE-v1\", device=\"cuda\").to(torch.float16)"
-      ],
-      "metadata": {
-        "id": "EwpMV0kXmpSX"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# ===============================\n",
-        "# 6. JSON 파싱 함수\n",
-        "# ===============================\n",
-        "def robust_parse_json(response_text):\n",
-        "    response_text = response_text.strip().strip(\"'\").strip('\"').replace(\"'\", '\"')\n",
-        "    try:\n",
-        "        return json.loads(response_text)\n",
-        "    except:\n",
-        "        try:\n",
-        "            return ast.literal_eval(response_text)\n",
-        "        except:\n",
-        "            return {\"search\": \"\"}\n",
-        "\n",
-        "# ===============================\n",
-        "# 7. 검색 쿼리 생성 (QCR 단계)\n",
-        "# ===============================\n",
-        "def generate_search_query(conversation_history, user_input):\n",
-        "    instruction = (\n",
-        "        \"다음은 대화 기록(Context)와 사용자의 질문(Input)입니다. \"\n",
-        "        \"사용자의 질문에 답을 제공하기 위해 필요한 단일 문자열 검색 쿼리를 생성하세요. \"\n",
-        "        \"검색이 필요하지 않거나 검색이 불필요한 경우(인사나, 겉치레, 농담) 빈 문자열을 반환하세요.\\n\\n\"\n",
-        "        \"최종 출력 형식은 {'search': '<검색 쿼리>'}입니다.\"\n",
-        "    )\n",
-        "    prompt = f\"\"\"\n",
-        "    # Query Rewriter\n",
-        "    ### Instruction:\n",
-        "    {instruction}\n",
-        "    ### Conversation:\n",
-        "    {'\\n'.join([f'{role}: {msg}' for role, msg in conversation_history])}\n",
-        "    ### Input:\n",
-        "    {user_input}\n",
-        "    ### Response:\n",
-        "    \"\"\"\n",
-        "\n",
-        "    inputs = tokenizer([prompt], return_tensors=\"pt\").to(\"cuda\")\n",
-        "    output_tokens = model.generate(**inputs, max_new_tokens=300)\n",
-        "    response_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True).split(\"### Response:\")[-1].strip()\n",
-        "    return robust_parse_json(response_text).get(\"search\", \"\")\n",
-        "\n",
-        "# ===============================\n",
-        "# 8. FAISS 검색\n",
-        "# ===============================\n",
-        "def search_documents(query, k=3):\n",
-        "    if not query:\n",
-        "        return \"\"\n",
-        "    query_vector = embedding_model.encode([query])[0]\n",
-        "    _, indices = faiss_index.search(np.array([query_vector]), k)\n",
-        "    return \"\\n\\n\".join([f\"# Index [{i+1}]: {titles[idx]}\\n{texts[idx]}\" for i, idx in enumerate(indices[0])])\n",
-        "\n",
-        "# ===============================\n",
-        "# 9. 답변 생성\n",
-        "# ===============================\n",
-        "def generate_response(conversation_history, context, user_input):\n",
-        "    instruction = (\n",
-        "        \"당신은 외부검색을 이용하여 사용자에게 도움을 주는 인공지능 조수입니다.\\n\"\n",
-        "        \"- Context는 외부검색을 통해 반환된 사용자 요청과 관련된 정보들입니다.\\n\"\n",
-        "        \"- Context를 활용할 때 문장 끝에 사용한 문서 조각의 [Index]를 붙이고 자연스러운 답변을 작성하세요. (e.g. [1])\\n\"\n",
-        "        \"- Context의 정보가 사용자 요청과 관련이 없거나 도움이 안될수도 있습니다. 관련있는 정보만 활용하고, 없는 정보를 절대 지어내지 마세요.\\n\"\n",
-        "        \"- 되도록이면 일반 지식으로 답변하지말고, 최대한 Context를 통해서 답변을 하려고 하세요. Context에 없을 경우에는 이 점을 언급하며 사죄하고 다른 주제나 질문을 추천해주세요.\\n\"\n",
-        "        \"- 사용자 요청에 알맞는 자연스러운 대화를 하세요.\\n\"\n",
-        "        \"- 항상 존댓말로 답변하세요.\"\n",
-        "    )\n",
-        "\n",
-        "    prompt = f\"\"\"\n",
-        "    # Generator\n",
-        "    ### Instruction:\n",
-        "    {instruction}\n",
-        "    ### Conversation:\n",
-        "    {'\\n'.join([f'{role}: {msg}' for role, msg in conversation_history])}\n",
-        "    ### Context:\n",
-        "    {context}\n",
-        "    ### Input:\n",
-        "    {user_input}\n",
-        "    ### Response:\n",
-        "    \"\"\"\n",
-        "\n",
-        "    inputs = tokenizer([prompt], return_tensors=\"pt\").to(\"cuda\")\n",
-        "    output_tokens = model.generate(**inputs, max_new_tokens=2500, do_sample=True)\n",
-        "    return tokenizer.decode(output_tokens[0], skip_special_tokens=True).split(\"### Response:\")[-1].strip()"
-      ],
-      "metadata": {
-        "id": "Nsv2Xp2kmp1S"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# ===============================\n",
-        "# 10. 대화 루프\n",
-        "# ===============================\n",
-        "def chat_loop():\n",
-        "    conversation_history = []\n",
-        "    print(\"대화를 시작합니다. 'exit' 입력 시 종료.\")\n",
-        "\n",
-        "    while True:\n",
-        "        user_input = input(\"\\nUser> \").strip()\n",
-        "        if user_input.lower() in [\"exit\", \"quit\"]:\n",
-        "            print(\"대화를 종료합니다.\")\n",
-        "            break\n",
-        "\n",
-        "        print(\"\\n[검색 쿼리 생성 중...]\")\n",
-        "        search_query = generate_search_query(conversation_history, user_input)\n",
-        "        context = search_documents(search_query, k=5) if search_query else \"\"\n",
-        "\n",
-        "        print(\"\\n[답변 생성 중...]\")\n",
-        "        response = generate_response(conversation_history, context, user_input)\n",
-        "\n",
-        "        conversation_history.append((\"User\", user_input))\n",
-        "        conversation_history.append((\"Assistant\", response))\n",
-        "        print(f\"\\nAssistant> {response}\")\n",
-        "\n",
-        "if __name__ == \"__main__\":\n",
-        "    chat_loop()"
-      ],
-      "metadata": {
-        "id": "4XD0UDZImsuE"
-      },
-      "execution_count": null,
-      "outputs": []
-    }
-  ]
-}