how to run on vllm with tool call parsing -- encountered an error
#9
by
radek
- opened
Unfortunately, when I add the commands for tool parsing and attempt to start vllm
with vllm serve
, I get:
from vllm.entrypoints.cli.main import main
File "/vllm/vllm/entrypoints/cli/__init__.py", line 4, in <module>
from vllm.entrypoints.cli.benchmark.serve import BenchmarkServingSubcommand
File "/vllm/vllm/entrypoints/cli/benchmark/serve.py", line 5, in <module>
from vllm.benchmarks.serve import add_cli_args, main
File "/vllm/vllm/benchmarks/serve.py", line 36, in <module>
from vllm.benchmarks.datasets import (SampleRequest, add_dataset_parser,
File "/vllm/vllm/benchmarks/datasets.py", line 32, in <module>
from vllm.lora.utils import get_adapter_absolute_path
File "/vllm/vllm/lora/utils.py", line 16, in <module>
from vllm.lora.fully_sharded_layers import (
File "/vllm/vllm/lora/fully_sharded_layers.py", line 15, in <module>
from vllm.lora.layers import (ColumnParallelLinearWithLoRA,
File "/vllm/vllm/lora/layers.py", line 23, in <module>
from vllm.model_executor.layers.linear import (ColumnParallelLinear,
File "/vllm/vllm/model_executor/layers/linear.py", line 22, in <module>
from vllm.model_executor.layers.utils import dispatch_unquantized_gemm
File "/vllm/vllm/model_executor/layers/utils.py", line 8, in <module>
from vllm import _custom_ops as ops
File "/vllm/vllm/_custom_ops.py", line 440, in <module>
@register_fake("_C::marlin_qqq_gemm")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/torch/library.py", line 1023, in register
use_lib._register_fake(op_name, func, _stacklevel=stacklevel + 1)
File "/usr/local/lib/python3.12/site-packages/torch/library.py", line 214, in _register_fake
handle = entry.fake_impl.register(func_to_register, source)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/local/lib/python3.12/site-packages/torch/_library/fake_impl.py", line 31, in register
if torch._C._dispatch_has_kernel_for_dispatch_key(self.qualname, "Meta"):
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: operator _C::marlin_qqq_gemm does not exist
Is there a prebuilt docker image I could use? If not, could you please share how you built vllm to include marlin_qqq_gemm?
Thank you so much for your help!
radek
changed discussion status to
closed