Morgan Funtowicz
commited on
Commit
·
0727aa0
1
Parent(s):
36406e7
feat(text-generation): default to transformers backend as much as we can
Browse files- Dockerfile +1 -0
- entrypoint.sh +1 -0
Dockerfile
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
FROM lmsysorg/sglang:latest
|
2 |
|
3 |
ENV MODEL_ID="/repository"
|
|
|
4 |
ENV KV_CACHE_DTYPE="auto"
|
5 |
ENV TP_SIZE="1"
|
6 |
ENV QUANT_METHOD="w8a8_int8"
|
|
|
1 |
FROM lmsysorg/sglang:latest
|
2 |
|
3 |
ENV MODEL_ID="/repository"
|
4 |
+
ENV MODEL_IMPL="transformers"
|
5 |
ENV KV_CACHE_DTYPE="auto"
|
6 |
ENV TP_SIZE="1"
|
7 |
ENV QUANT_METHOD="w8a8_int8"
|
entrypoint.sh
CHANGED
@@ -8,6 +8,7 @@ python3 -m sglang.launch_server \
|
|
8 |
--quantization $QUANT_METHOD \
|
9 |
--enable-torch-compile \
|
10 |
--enable-ep-moe \
|
|
|
11 |
--tool-call-parser qwen25 \
|
12 |
--host 0.0.0.0 \
|
13 |
--port 80
|
|
|
8 |
--quantization $QUANT_METHOD \
|
9 |
--enable-torch-compile \
|
10 |
--enable-ep-moe \
|
11 |
+
--model-impl $MODEL_IMPL \
|
12 |
--tool-call-parser qwen25 \
|
13 |
--host 0.0.0.0 \
|
14 |
--port 80
|