Use transformers backend

#2
by mfuntowicz HF Staff - opened
Files changed (2) hide show
  1. Dockerfile +1 -0
  2. entrypoint.sh +1 -0
Dockerfile CHANGED
@@ -1,6 +1,7 @@
1
  FROM lmsysorg/sglang:latest
2
 
3
  ENV MODEL_ID="/repository"
 
4
  ENV KV_CACHE_DTYPE="auto"
5
  ENV TP_SIZE="1"
6
  ENV QUANT_METHOD="w8a8_int8"
 
1
  FROM lmsysorg/sglang:latest
2
 
3
  ENV MODEL_ID="/repository"
4
+ ENV MODEL_IMPL="transformers"
5
  ENV KV_CACHE_DTYPE="auto"
6
  ENV TP_SIZE="1"
7
  ENV QUANT_METHOD="w8a8_int8"
entrypoint.sh CHANGED
@@ -8,6 +8,7 @@ python3 -m sglang.launch_server \
8
  --quantization $QUANT_METHOD \
9
  --enable-torch-compile \
10
  --enable-ep-moe \
 
11
  --tool-call-parser qwen25 \
12
  --host 0.0.0.0 \
13
  --port 80
 
8
  --quantization $QUANT_METHOD \
9
  --enable-torch-compile \
10
  --enable-ep-moe \
11
+ --model-impl $MODEL_IMPL \
12
  --tool-call-parser qwen25 \
13
  --host 0.0.0.0 \
14
  --port 80