Commit
·
32efff5
1
Parent(s):
e3a80c0
Update requirements and refactor client integration: Add extra index URL for PyTorch in requirements.txt, integrate open_ai_client in main.py, and adjust image generation parameters in image_service.py. Refactor llama_cpp_client to improve model loading configuration and enhance error handling in image_pipeline_client.
Browse files- requirements.txt +1 -0
- src/constants/config.py +0 -1
- src/main.py +2 -0
- src/services/chat_service.py +0 -11
- src/services/image_service.py +4 -4
- src/utils/clients/image_pipeline_client.py +5 -2
- src/utils/clients/llama_cpp_client.py +5 -19
requirements.txt
CHANGED
@@ -8,6 +8,7 @@ diffusers == 0.33.1
|
|
8 |
accelerate == 1.6.0
|
9 |
# transformers == 4.52.4
|
10 |
torch==2.7.0
|
|
|
11 |
|
12 |
# # If use bitsandbytes with cuda
|
13 |
# https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl
|
|
|
8 |
accelerate == 1.6.0
|
9 |
# transformers == 4.52.4
|
10 |
torch==2.7.0
|
11 |
+
--extra-index-url https://download.pytorch.org/whl/cu128
|
12 |
|
13 |
# # If use bitsandbytes with cuda
|
14 |
# https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_main/bitsandbytes-1.33.7.preview-py3-none-manylinux_2_24_x86_64.whl
|
src/constants/config.py
CHANGED
@@ -4,7 +4,6 @@ IS_APPLE_SILICON = torch.backends.mps.is_available()
|
|
4 |
IS_CUDA_AVAILABLE = torch.cuda.is_available()
|
5 |
|
6 |
TORCH_DEVICE = "cuda" if IS_CUDA_AVAILABLE else "mps" if IS_APPLE_SILICON else "cpu"
|
7 |
-
|
8 |
# Enable quantization for CPU/MPS
|
9 |
USE_QUANT = IS_CUDA_AVAILABLE
|
10 |
|
|
|
4 |
IS_CUDA_AVAILABLE = torch.cuda.is_available()
|
5 |
|
6 |
TORCH_DEVICE = "cuda" if IS_CUDA_AVAILABLE else "mps" if IS_APPLE_SILICON else "cpu"
|
|
|
7 |
# Enable quantization for CPU/MPS
|
8 |
USE_QUANT = IS_CUDA_AVAILABLE
|
9 |
|
src/main.py
CHANGED
@@ -12,6 +12,7 @@ from routes import chat_routes, process_file_routes, vector_store_routes
|
|
12 |
from utils.clients import (
|
13 |
image_pipeline_client,
|
14 |
llama_cpp_client,
|
|
|
15 |
transformer_client,
|
16 |
vector_store_client,
|
17 |
)
|
@@ -25,6 +26,7 @@ async def lifespan(app: FastAPI):
|
|
25 |
vector_store_client.load_vector_store_client()
|
26 |
image_pipeline_client.load_pipeline()
|
27 |
llama_cpp_client.load()
|
|
|
28 |
# pass
|
29 |
|
30 |
except Exception as e:
|
|
|
12 |
from utils.clients import (
|
13 |
image_pipeline_client,
|
14 |
llama_cpp_client,
|
15 |
+
open_ai_client,
|
16 |
transformer_client,
|
17 |
vector_store_client,
|
18 |
)
|
|
|
26 |
vector_store_client.load_vector_store_client()
|
27 |
image_pipeline_client.load_pipeline()
|
28 |
llama_cpp_client.load()
|
29 |
+
# open_ai_client.load_open_ai_client()
|
30 |
# pass
|
31 |
|
32 |
except Exception as e:
|
src/services/chat_service.py
CHANGED
@@ -82,16 +82,11 @@ def chat_generate_stream(
|
|
82 |
|
83 |
with measure_time("Tool call handling"):
|
84 |
tool_call_result = tools_helper.process_tool_calls(tool_calls)
|
85 |
-
# tool_call_message = {
|
86 |
-
# "role": "tool",
|
87 |
-
# "content": tool_call_result.get("content", ""),
|
88 |
-
# }
|
89 |
messages.append(tool_call_result)
|
90 |
|
91 |
with measure_time("Generate new stream"):
|
92 |
new_stream = client.generate_stream(messages, has_tool_call=False)
|
93 |
for chunk in new_stream:
|
94 |
-
print(chunk.get("choices", [])[0].get("delta", {}).get("content"))
|
95 |
yield chunk
|
96 |
|
97 |
|
@@ -113,14 +108,8 @@ def chat_generate(request: ChatRequest):
|
|
113 |
|
114 |
with measure_time("Tool call handling"):
|
115 |
tool_call_result = tools_helper.process_tool_calls(tool_calls=tool_calls)
|
116 |
-
# tool_call_message = {
|
117 |
-
# "role": "tool",
|
118 |
-
# "content": tool_call_result.get("content", ""),
|
119 |
-
# }
|
120 |
messages.append(tool_call_result)
|
121 |
|
122 |
-
print(messages)
|
123 |
-
|
124 |
with measure_time("Generate new chat completion"):
|
125 |
new_output = client.generate(messages=messages, has_tool_call=False)
|
126 |
|
|
|
82 |
|
83 |
with measure_time("Tool call handling"):
|
84 |
tool_call_result = tools_helper.process_tool_calls(tool_calls)
|
|
|
|
|
|
|
|
|
85 |
messages.append(tool_call_result)
|
86 |
|
87 |
with measure_time("Generate new stream"):
|
88 |
new_stream = client.generate_stream(messages, has_tool_call=False)
|
89 |
for chunk in new_stream:
|
|
|
90 |
yield chunk
|
91 |
|
92 |
|
|
|
108 |
|
109 |
with measure_time("Tool call handling"):
|
110 |
tool_call_result = tools_helper.process_tool_calls(tool_calls=tool_calls)
|
|
|
|
|
|
|
|
|
111 |
messages.append(tool_call_result)
|
112 |
|
|
|
|
|
113 |
with measure_time("Generate new chat completion"):
|
114 |
new_output = client.generate(messages=messages, has_tool_call=False)
|
115 |
|
src/services/image_service.py
CHANGED
@@ -4,12 +4,12 @@ from constants.config import OUTPUT_DIR
|
|
4 |
from utils.clients import image_pipeline_client
|
5 |
|
6 |
negative_promt = "blurry, distorted, pixelated, incomplete, poorly drawn, misaligned, weird proportions, bad perspective, unnatural colors, noisy, out of focus, glitchy, unsharp, overexposed, underexposed, poorly lit, bad composition, excessive noise, oversaturated, too dark, too bright, inconsistent lighting, discolored, overly stylized, unrealistic, awkward pose, unbalanced, mismatched, distorted features, flat, unnatural texture, chaotic, unreadable, incoherent, asymmetrical, low quality, lowres, wrong anatomy, bad anatomy, deformed, disfigured, ugly"
|
7 |
-
width =
|
8 |
-
height =
|
9 |
guidance_scale = 7.5
|
10 |
-
num_inference_steps =
|
11 |
|
12 |
-
base_url = "http://
|
13 |
|
14 |
|
15 |
def generate_image_url(prompt: str) -> str:
|
|
|
4 |
from utils.clients import image_pipeline_client
|
5 |
|
6 |
negative_promt = "blurry, distorted, pixelated, incomplete, poorly drawn, misaligned, weird proportions, bad perspective, unnatural colors, noisy, out of focus, glitchy, unsharp, overexposed, underexposed, poorly lit, bad composition, excessive noise, oversaturated, too dark, too bright, inconsistent lighting, discolored, overly stylized, unrealistic, awkward pose, unbalanced, mismatched, distorted features, flat, unnatural texture, chaotic, unreadable, incoherent, asymmetrical, low quality, lowres, wrong anatomy, bad anatomy, deformed, disfigured, ugly"
|
7 |
+
width = 512
|
8 |
+
height = 512
|
9 |
guidance_scale = 7.5
|
10 |
+
num_inference_steps = 30
|
11 |
|
12 |
+
base_url = "http://leonguyen101120.zapto.org:7860"
|
13 |
|
14 |
|
15 |
def generate_image_url(prompt: str) -> str:
|
src/utils/clients/image_pipeline_client.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from constants.config import IMAGE_MODEL_ID_OR_LINK
|
2 |
from utils.timing import measure_time
|
3 |
|
4 |
|
@@ -20,6 +20,9 @@ def load_pipeline():
|
|
20 |
raise ImportError(
|
21 |
"diffusers is not installed. Please install it using 'pip install diffusers'."
|
22 |
)
|
|
|
|
|
|
|
23 |
|
24 |
with measure_time("Load image pipeline"):
|
25 |
pipeline = StableDiffusionPipeline.from_pretrained(
|
@@ -28,7 +31,7 @@ def load_pipeline():
|
|
28 |
variant="fp16",
|
29 |
# safety_checker=True,
|
30 |
use_safetensors=True,
|
31 |
-
)
|
32 |
|
33 |
|
34 |
def clear_resources():
|
|
|
1 |
+
from constants.config import IMAGE_MODEL_ID_OR_LINK, TORCH_DEVICE
|
2 |
from utils.timing import measure_time
|
3 |
|
4 |
|
|
|
20 |
raise ImportError(
|
21 |
"diffusers is not installed. Please install it using 'pip install diffusers'."
|
22 |
)
|
23 |
+
|
24 |
+
print(TORCH_DEVICE)
|
25 |
+
|
26 |
|
27 |
with measure_time("Load image pipeline"):
|
28 |
pipeline = StableDiffusionPipeline.from_pretrained(
|
|
|
31 |
variant="fp16",
|
32 |
# safety_checker=True,
|
33 |
use_safetensors=True,
|
34 |
+
).to(TORCH_DEVICE)
|
35 |
|
36 |
|
37 |
def clear_resources():
|
src/utils/clients/llama_cpp_client.py
CHANGED
@@ -27,31 +27,17 @@ def load():
|
|
27 |
"llama_cpp is not installed. Please install it using 'pip install llama-cpp-python'."
|
28 |
)
|
29 |
|
|
|
30 |
global _llm
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
# n_threads=os.cpu_count(),
|
36 |
-
# n_gpu_layers=-1,
|
37 |
-
# n_ctx=4096,
|
38 |
-
# verbose=True,
|
39 |
-
# use_mlock=True,
|
40 |
-
# use_mmap=True,
|
41 |
-
# # messages_to_prompt=messages_to_prompt,
|
42 |
-
# # completion_to_prompt=completion_to_prompt,
|
43 |
-
# )
|
44 |
-
|
45 |
-
_llm = llama_cpp.Llama(
|
46 |
-
model_path=f"./.cache/{GGUF_FILE_NAME}",
|
47 |
n_threads=os.cpu_count(),
|
48 |
n_gpu_layers=-1,
|
49 |
-
n_ctx=
|
50 |
verbose=True,
|
51 |
-
use_mlock=True,
|
52 |
use_mmap=True,
|
53 |
-
# messages_to_prompt=messages_to_prompt,
|
54 |
-
# completion_to_prompt=completion_to_prompt,
|
55 |
)
|
56 |
|
57 |
|
|
|
27 |
"llama_cpp is not installed. Please install it using 'pip install llama-cpp-python'."
|
28 |
)
|
29 |
|
30 |
+
|
31 |
global _llm
|
32 |
|
33 |
+
_llm = llama_cpp.Llama.from_pretrained(
|
34 |
+
repo_id=GGUF_REPO_ID,
|
35 |
+
filename=GGUF_FILE_NAME,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
n_threads=os.cpu_count(),
|
37 |
n_gpu_layers=-1,
|
38 |
+
n_ctx=16384,
|
39 |
verbose=True,
|
|
|
40 |
use_mmap=True,
|
|
|
|
|
41 |
)
|
42 |
|
43 |
|