hfendpoints-images
/

text-generation-sglang-gpu

Model card Files Files and versions Community

Use transformers backend

#2

by mfuntowicz HF Staff - opened 5 days ago

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

This PR is in draft mode

Files changed (2) hide show

Dockerfile +1 -0
entrypoint.sh +1 -0

Dockerfile CHANGED Viewed

@@ -1,6 +1,7 @@
 FROM lmsysorg/sglang:latest
 ENV MODEL_ID="/repository"
 ENV KV_CACHE_DTYPE="auto"
 ENV TP_SIZE="1"
 ENV QUANT_METHOD="w8a8_int8"

 FROM lmsysorg/sglang:latest
 ENV MODEL_ID="/repository"
+ENV MODEL_IMPL="transformers"
 ENV KV_CACHE_DTYPE="auto"
 ENV TP_SIZE="1"
 ENV QUANT_METHOD="w8a8_int8"

entrypoint.sh CHANGED Viewed

@@ -8,6 +8,7 @@ python3 -m sglang.launch_server \
   --quantization $QUANT_METHOD \
   --enable-torch-compile \
   --enable-ep-moe \
   --tool-call-parser qwen25 \
   --host 0.0.0.0 \
   --port 80

   --quantization $QUANT_METHOD \
   --enable-torch-compile \
   --enable-ep-moe \
+  --model-impl $MODEL_IMPL \
   --tool-call-parser qwen25 \
   --host 0.0.0.0 \
   --port 80