Update reranker/rank_listwise_os_vllm.py to reduce memory footprint
Browse files
reranker/rank_listwise_os_vllm.py
CHANGED
@@ -44,7 +44,15 @@ class RankListwiseOSLLM(RankLLM):
|
|
44 |
f"Unsupported prompt mode: {prompt_mode}. Only RANK_GPT is supported."
|
45 |
)
|
46 |
|
47 |
-
self._llm = LLM(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
self._tokenizer = self._llm.get_tokenizer()
|
49 |
self.system_message_supported = "system" in self._tokenizer.chat_template
|
50 |
self._batched = batched
|
|
|
44 |
f"Unsupported prompt mode: {prompt_mode}. Only RANK_GPT is supported."
|
45 |
)
|
46 |
|
47 |
+
self._llm = LLM(
|
48 |
+
model=model, max_logprobs=30,
|
49 |
+
enforce_eager=True,
|
50 |
+
gpu_memory_utilization=0.9,
|
51 |
+
max_model_len=2048,
|
52 |
+
trust_remote_code=True,
|
53 |
+
enable_chunked_prefill=True,
|
54 |
+
tensor_parallel_size=1
|
55 |
+
)
|
56 |
self._tokenizer = self._llm.get_tokenizer()
|
57 |
self.system_message_supported = "system" in self._tokenizer.chat_template
|
58 |
self._batched = batched
|