LeoNguyen101120 commited on
Commit
e48d33f
·
1 Parent(s): e086b97

Refactor Dockerfile and .dockerignore: Update file copying strategy to include only necessary files and improve ignored patterns for better build efficiency.

Browse files
Files changed (3) hide show
  1. .dockerignore +21 -7
  2. Dockerfile +3 -2
  3. src/utils/llama_cpp_client.py +21 -21
.dockerignore CHANGED
@@ -1,27 +1,41 @@
1
  # Byte-compiled / optimized / DLL files
2
- __pycache__/
3
- *.py[cod]
4
- *.pyo
5
- *.pyw
6
- *.pyz
 
7
 
8
  # Distribution / packaging
9
- *.egg-info/
 
10
 
11
  # OS files
 
12
  .DS_Store
13
 
14
  # Environment files
 
15
  .env
16
- .env.*
17
 
18
  # Project data and outputs
 
19
  outputs/
20
  uploads/
21
  data/
22
 
23
  # VSCode settings
 
24
  .vscode/
25
 
26
  # Git
 
27
  .git/
 
 
 
 
 
 
 
 
1
  # Byte-compiled / optimized / DLL files
2
+
3
+ **pycache**/
4
+ _.py[cod]
5
+ _.pyo
6
+ _.pyw
7
+ _.pyz
8
 
9
  # Distribution / packaging
10
+
11
+ \*.egg-info/
12
 
13
  # OS files
14
+
15
  .DS_Store
16
 
17
  # Environment files
18
+
19
  .env
20
+ .env.\*
21
 
22
  # Project data and outputs
23
+
24
  outputs/
25
  uploads/
26
  data/
27
 
28
  # VSCode settings
29
+
30
  .vscode/
31
 
32
  # Git
33
+
34
  .git/
35
+
36
+ .venv
37
+ .idea
38
+ .vscode
39
+ \*.md
40
+ .git
41
+ .env
Dockerfile CHANGED
@@ -35,8 +35,9 @@ RUN pip install --no-cache-dir "llama-cpp-python==0.3.8" --extra-index-url https
35
  RUN grep -v "llama-cpp-python" requirements.txt > requirements-no-llama.txt && \
36
  pip install --no-cache-dir -r requirements-no-llama.txt
37
 
38
- # 6. Copy the rest of the application code
39
- COPY . .
 
40
 
41
  # 7. Expose the port FastAPI will run on
42
  EXPOSE 7860
 
35
  RUN grep -v "llama-cpp-python" requirements.txt > requirements-no-llama.txt && \
36
  pip install --no-cache-dir -r requirements-no-llama.txt
37
 
38
+ # 6. Copy only necessary files and folders
39
+ COPY requirements.txt .
40
+ COPY src/ ./src
41
 
42
  # 7. Expose the port FastAPI will run on
43
  EXPOSE 7860
src/utils/llama_cpp_client.py CHANGED
@@ -7,25 +7,25 @@ from models.responses.chat_response import ChatResponse
7
  from utils.timing import measure_time
8
  from utils.tools import tools_define
9
 
10
- from transformers import AutoTokenizer
11
 
12
- tokenizer = AutoTokenizer.from_pretrained("modularai/Llama-3.1-8B-Instruct-GGUF")
13
 
14
 
15
- def messages_to_prompt(messages):
16
- messages = [{"role": m.role.value, "content": m.content} for m in messages]
17
- prompt = tokenizer.apply_chat_template(
18
- messages, tokenize=False, add_generation_prompt=True
19
- )
20
- return prompt
21
 
22
 
23
- def completion_to_prompt(completion):
24
- messages = [{"role": "user", "content": completion}]
25
- prompt = tokenizer.apply_chat_template(
26
- messages, tokenize=False, add_generation_prompt=True
27
- )
28
- return prompt
29
 
30
 
31
  # llm = llama_cpp.Llama(
@@ -44,8 +44,8 @@ llm = llama_cpp.Llama.from_pretrained(
44
  n_gpu_layers=-1,
45
  n_ctx=4096,
46
  verbose=True,
47
- messages_to_prompt=messages_to_prompt,
48
- completion_to_prompt=completion_to_prompt,
49
  )
50
 
51
 
@@ -60,11 +60,11 @@ def create(messages: List[Message], has_tool_call: bool = True):
60
  try:
61
  with measure_time("Starting create chat completion"):
62
  output = llm.create_chat_completion(
63
- prompt,
64
- tools=tools,
65
  tool_choice=tool_choice,
66
  ) # type: ignore
67
- return ChatResponse.from_llm_output(output)
68
  except Exception as e:
69
  print(f"Error in create chat completion: {str(e)}")
70
  raise
@@ -74,9 +74,9 @@ def create_stream(messages: List[Message]) -> Generator[ChatResponse, None, None
74
  prompt = [message.to_map() for message in messages]
75
 
76
  output = llm.create_chat_completion(
77
- prompt,
78
  stream=True,
79
- tools=tools_define.tools,
80
  tool_choice="auto",
81
  ) # type: ignore
82
  last_role = None
 
7
  from utils.timing import measure_time
8
  from utils.tools import tools_define
9
 
10
+ # from transformers import AutoTokenizer
11
 
12
+ # tokenizer = AutoTokenizer.from_pretrained("modularai/Llama-3.1-8B-Instruct-GGUF")
13
 
14
 
15
+ # def messages_to_prompt(messages):
16
+ # messages = [{"role": m.role.value, "content": m.content} for m in messages]
17
+ # prompt = tokenizer.apply_chat_template(
18
+ # messages, tokenize=False, add_generation_prompt=True
19
+ # )
20
+ # return prompt
21
 
22
 
23
+ # def completion_to_prompt(completion):
24
+ # messages = [{"role": "user", "content": completion}]
25
+ # prompt = tokenizer.apply_chat_template(
26
+ # messages, tokenize=False, add_generation_prompt=True
27
+ # )
28
+ # return prompt
29
 
30
 
31
  # llm = llama_cpp.Llama(
 
44
  n_gpu_layers=-1,
45
  n_ctx=4096,
46
  verbose=True,
47
+ # messages_to_prompt=messages_to_prompt,
48
+ # completion_to_prompt=completion_to_prompt,
49
  )
50
 
51
 
 
60
  try:
61
  with measure_time("Starting create chat completion"):
62
  output = llm.create_chat_completion(
63
+ prompt, # type: ignore
64
+ tools=tools, # type: ignore
65
  tool_choice=tool_choice,
66
  ) # type: ignore
67
+ return ChatResponse.from_llm_output(output) # type: ignore
68
  except Exception as e:
69
  print(f"Error in create chat completion: {str(e)}")
70
  raise
 
74
  prompt = [message.to_map() for message in messages]
75
 
76
  output = llm.create_chat_completion(
77
+ prompt, # type: ignore
78
  stream=True,
79
+ tools=tools_define.tools, # type: ignore
80
  tool_choice="auto",
81
  ) # type: ignore
82
  last_role = None