Spaces:

LeoNguyen101120
/

ai-assistance

Paused

App Files Files Community

LeoNguyen101120 commited on Jun 1

Commit

6b4fb1d

1 Parent(s): c2767f1

Update Dockerfile and requirements: Introduce requirements_for_server.txt for streamlined dependency management, remove requirements.local.txt, and adjust Dockerfile to install local packages. Refactor chat_service.py to utilize transformer_client for message generation, and update .gitignore to include local_packages_for_win.

Browse files

Files changed (8) hide show

.gitignore +2 -1
.vscode/launch.json +6 -15
Dockerfile +9 -28
requirements.local.txt +0 -25
requirements.txt +4 -3
requirements_for_server.txt +26 -0
src/services/chat_service.py +2 -3
src/utils/clients/transformer_client.py +2 -0

.gitignore CHANGED Viewed

@@ -13,4 +13,5 @@ data/*
 venv/
 .venv/
 bitsandbytes/*
-llama-cpp-python/*

 venv/
 .venv/
 bitsandbytes/*
+llama-cpp-python/*
+local_packages_for_win/*

.vscode/launch.json CHANGED Viewed

@@ -1,7 +1,4 @@
 {
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
     "version": "0.2.0",
     "configurations": [
         {
@@ -9,14 +6,6 @@
             "type": "debugpy",
             "request": "launch",
             "module": "uvicorn",
-            // "env": {
-            //     "db_username": "postgres",
-            //     "db_password": "secret",
-            //     "host_server": "localhost",
-            //     "database_name": "fastapi",
-            //     "ssl_mode": "prefer",
-            //     "db_server_port": "5432"
-            // },
             "env": {
                 "PYTHONPATH": "${workspaceFolder}/src"
             },
@@ -26,9 +15,11 @@
                 "--port",
                 "7860",
                 "--host",
-                "0.0.0.0",
-            ]
         }
     ]
-}

 {
     "version": "0.2.0",
     "configurations": [
         {
             "type": "debugpy",
             "request": "launch",
             "module": "uvicorn",
             "env": {
                 "PYTHONPATH": "${workspaceFolder}/src"
             },
                 "--port",
                 "7860",
                 "--host",
+                "0.0.0.0"
+            ],
+            "windows": {
+                "python": "${workspaceFolder}\\.venv\\Scripts\\python.exe"
+            }
         }
     ]
+}

Dockerfile CHANGED Viewed

@@ -1,48 +1,29 @@
-# 1. Use an official Python base image (slim for smaller size)
 FROM python:3.11-slim
-# 2. Set environment variables for Python
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1
-# 3. Set the working directory in the container
 WORKDIR /src
-# 4. Install system dependencies required for Python packages and llama-cpp-python
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
     build-essential \
-    # gcc \
-    # g++ \
     cmake \
-    # tesseract-ocr \
-    # libgl1 \
     libglib2.0-0 \
     git \
     && rm -rf /var/lib/apt/lists/*
-RUN mkdir -p /tmp/cache /tmp/vector_store /.cache && \
-    chown -R 1000:1000 /tmp /.cache
-# 5. Copy requirements.txt and install Python dependencies
-COPY requirements.txt .
-# RUN pip install --no-cache-dir --upgrade -r requirements.txt
-# # Install llama-cpp-python first (faster rebuilds)
-# RUN pip install --no-cache-dir "llama-cpp-python==0.3.8" --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
-# # Remove llama line from requirements and install the rest
-# RUN grep -v "llama-cpp-python" requirements.txt > requirements-no-llama.txt && \
-#     pip install --no-cache-dir -r requirements-no-llama.txt
-run pip install --no-cache-dir -r requirements.txt
-# 6. Copy only necessary files and folders
-COPY requirements.txt .
 COPY src/ .
-# 7. Expose the port FastAPI will run on
 EXPOSE 7860
-# 8. Set the default command to run the FastAPI app
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.11-slim
 ENV PYTHONDONTWRITEBYTECODE=1 \
     PYTHONUNBUFFERED=1
 WORKDIR /src
+# 1. Cài các package cần thiết
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
     build-essential \
     cmake \
     libglib2.0-0 \
     git \
     && rm -rf /var/lib/apt/lists/*
+# 2. Copy requirements và thư viện local
+COPY requirements_for_server.txt ./
+COPY local_packages_for_server/ /tmp/local_packages_for_server/
+# 3. Cài đặt gói từ local
+RUN pip install --no-cache-dir --find-links=/tmp/local_packages_for_server -r requirements_for_server.txt
+# 4. Copy mã nguồn
 COPY src/ .
+# 5. Expose và chạy app
 EXPOSE 7860
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

requirements.local.txt DELETED Viewed

@@ -1,25 +0,0 @@
-fastapi[standard]>=0.113.0,<0.114.0
-pydantic>=2.7.0,<3.0.0
-uvicorn>=0.34.2
-python-dotenv>=1.1.0
-requests>=2.32.3
-openai>=1.76.0
-torch>=2.3.0,<2.6.0
-Pillow>=11.2.1
-yfinance>=0.2.56
-python-multipart>=0.0.20
-diffusers>=0.33.1
-transformers>=4.51.3
-accelerate>=1.6.0
-beautifulsoup4>=4.13.4
-pymupdf>=1.25.1
-docx2txt>=0.8
-pytesseract>=0.3.13
-langchain_community>=0.3.19
-langchain>=0.3.20
-langchain_chroma>=0.2.2
-chromadb>=0.6.3
-sentence_transformers>=4.1.0
-langchain_huggingface>=0.1.2
-huggingface_hub[hf_xet]
-llama-cpp-python==0.3.8

requirements.txt CHANGED Viewed

@@ -1,13 +1,14 @@
 fastapi[standard] == 0.114.0
 uvicorn == 0.34.2
-torch==2.7.0
---extra-index-url https://download.pytorch.org/whl/cu128
 # If use diffusers
 diffusers == 0.33.1
-# bitsandbytes == 0.46.0
 accelerate == 1.6.0
 transformers == 4.52.4
 # If use llama-cpp-python
 # llama-cpp-python == 0.3.8

 fastapi[standard] == 0.114.0
 uvicorn == 0.34.2
 # If use diffusers
 diffusers == 0.33.1
 accelerate == 1.6.0
 transformers == 4.52.4
+torch==2.7.0
+# Offline install
+bitsandbytes -f ./local_packages/bitsandbytes-0.46.0-cp310-cp310-win_amd64.whl
 # If use llama-cpp-python
 # llama-cpp-python == 0.3.8

requirements_for_server.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+fastapi[standard] == 0.114.0
+uvicorn == 0.34.2
+# If use diffusers
+diffusers == 0.33.1
+accelerate == 1.6.0
+transformers == 4.52.4
+# # Offline install
+# torch==2.7.0
+# -f ./local_packages/torch-2.7.0+cu128-cp310-cp310-win_amd64.whl
+# bitsandbytes -f ./local_packages/bitsandbytes-0.46.0-cp310-cp310-win_amd64.whl
+# If use llama-cpp-python
+# llama-cpp-python == 0.3.8
+# --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124
+# If process file feature enable
+# beautifulsoup4 == 4.13.4
+# requests == 2.32.3
+# langchain_chroma == 0.2.2
+# langchain_huggingface == 0.1.2
+# langchain_community == 0.3.19
+# chromadb == 0.6.3
+# pymupdf == 1.25.1

src/services/chat_service.py CHANGED Viewed

@@ -3,7 +3,6 @@ from models.requests.chat_request import ChatRequest
 from services import vector_store_service
 # from utils.llama_cpp_client import create, create_stream
-from utils.clients import open_ai_client
 from utils.clients import transformer_client
 from utils.timing import measure_time
 from utils.tools import tools_helper
@@ -87,7 +86,7 @@ def chat_generate(request: ChatRequest):
     messages = build_context_prompt(request)
     messages.extend(request.messages)
-    output = open_ai_client.generate(messages=messages)
     choices = output.get("choices", [])
     tool_calls = choices[0].get("message").get("tool_calls")
@@ -100,5 +99,5 @@ def chat_generate(request: ChatRequest):
     messages.append(tool_call_message)
     # new_output = generate(messages=messages, has_tool_call=False)
-    new_output = open_ai_client.generate(messages=messages, has_tool_call=False)
     return new_output

 from services import vector_store_service
 # from utils.llama_cpp_client import create, create_stream
 from utils.clients import transformer_client
 from utils.timing import measure_time
 from utils.tools import tools_helper
     messages = build_context_prompt(request)
     messages.extend(request.messages)
+    output = transformer_client.generate(messages=messages)
     choices = output.get("choices", [])
     tool_calls = choices[0].get("message").get("tool_calls")
     messages.append(tool_call_message)
     # new_output = generate(messages=messages, has_tool_call=False)
+    new_output = transformer_client.generate(messages=messages, has_tool_call=False)
     return new_output

src/utils/clients/transformer_client.py CHANGED Viewed

@@ -35,6 +35,7 @@ def load_model():
     try:
         with measure_time("Load model"):
             if USE_QUANT:
                 quantization_config = BitsAndBytesConfig(
                     load_in_4bit=True,
                     bnb_4bit_quant_type="nf4",
@@ -52,6 +53,7 @@ def load_model():
                     # max_memory={0: "4GiB"},  # Limit GPU memory usage
                 )
             else:
                 _model = AutoModelForCausalLM.from_pretrained(
                     LLM_MODEL_NAME,
                     torch_dtype=MODEL_OPTIMIZATION["torch_dtype"],

     try:
         with measure_time("Load model"):
             if USE_QUANT:
+                print("Using quantization")
                 quantization_config = BitsAndBytesConfig(
                     load_in_4bit=True,
                     bnb_4bit_quant_type="nf4",
                     # max_memory={0: "4GiB"},  # Limit GPU memory usage
                 )
             else:
+                print("Not using quantization")
                 _model = AutoModelForCausalLM.from_pretrained(
                     LLM_MODEL_NAME,
                     torch_dtype=MODEL_OPTIMIZATION["torch_dtype"],