mrinjera commited on
Commit
08d77f3
·
verified ·
1 Parent(s): baa16bb

Update reranker/rank_listwise_os_vllm.py to reduce memory footprint

Browse files
Files changed (1) hide show
  1. reranker/rank_listwise_os_vllm.py +9 -1
reranker/rank_listwise_os_vllm.py CHANGED
@@ -44,7 +44,15 @@ class RankListwiseOSLLM(RankLLM):
44
  f"Unsupported prompt mode: {prompt_mode}. Only RANK_GPT is supported."
45
  )
46
 
47
- self._llm = LLM(model=model, max_logprobs=30, enforce_eager=False, gpu_memory_utilization=0.9, max_model_len=32768, trust_remote_code=True, enable_chunked_prefill=True, tensor_parallel_size=1)
 
 
 
 
 
 
 
 
48
  self._tokenizer = self._llm.get_tokenizer()
49
  self.system_message_supported = "system" in self._tokenizer.chat_template
50
  self._batched = batched
 
44
  f"Unsupported prompt mode: {prompt_mode}. Only RANK_GPT is supported."
45
  )
46
 
47
+ self._llm = LLM(
48
+ model=model, max_logprobs=30,
49
+ enforce_eager=True,
50
+ gpu_memory_utilization=0.9,
51
+ max_model_len=2048,
52
+ trust_remote_code=True,
53
+ enable_chunked_prefill=True,
54
+ tensor_parallel_size=1
55
+ )
56
  self._tokenizer = self._llm.get_tokenizer()
57
  self.system_message_supported = "system" in self._tokenizer.chat_template
58
  self._batched = batched