nm-research commited on
Commit
9043b00
·
verified ·
1 Parent(s): ca2dad6

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -39,7 +39,7 @@ from transformers import AutoTokenizer
39
  from vllm import LLM, SamplingParams
40
 
41
  max_model_len, tp_size = 4096, 1
42
- model_name = "neuralmagic-ent/DeepSeek-R1-Distill-Llama-8B-FP8-Dynamic"
43
  tokenizer = AutoTokenizer.from_pretrained(model_name)
44
  llm = LLM(model=model_name, tensor_parallel_size=tp_size, max_model_len=max_model_len, trust_remote_code=True)
45
  sampling_params = SamplingParams(temperature=0.6, max_tokens=256, stop_token_ids=[tokenizer.eos_token_id])
@@ -112,7 +112,7 @@ OpenLLM Leaderboard V1:
112
  ```
113
  lm_eval \
114
  --model vllm \
115
- --model_args pretrained="neuralmagic-ent/DeepSeek-R1-Distill-Llama-8B-FP8-Dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1,gpu_memory_utilization=0.8,enable_chunked_prefill=True,trust_remote_code=True \
116
  --tasks openllm \
117
  --write_out \
118
  --batch_size auto \
@@ -124,7 +124,7 @@ OpenLLM Leaderboard V2:
124
  ```
125
  lm_eval \
126
  --model vllm \
127
- --model_args pretrained="neuralmagic-ent/DeepSeek-R1-Distill-Llama-8B-FP8-Dynamic",dtype=auto,add_bos_token=False,max_model_len=4096,tensor_parallel_size=1,gpu_memory_utilization=0.8,enable_chunked_prefill=True,trust_remote_code=True \
128
  --apply_chat_template \
129
  --fewshot_as_multiturn \
130
  --tasks leaderboard \
@@ -143,7 +143,7 @@ lm_eval \
143
  <th>Category</th>
144
  <th>Metric</th>
145
  <th>deepseek-ai/DeepSeek-R1-Distill-Llama-8B</th>
146
- <th>neuralmagic-ent/DeepSeek-R1-Distill-Llama-8B-FP8-Dynamic</th>
147
  <th>Recovery</th>
148
  </tr>
149
  </thead>
 
39
  from vllm import LLM, SamplingParams
40
 
41
  max_model_len, tp_size = 4096, 1
42
+ model_name = "neuralmagic/DeepSeek-R1-Distill-Llama-8B-FP8-Dynamic"
43
  tokenizer = AutoTokenizer.from_pretrained(model_name)
44
  llm = LLM(model=model_name, tensor_parallel_size=tp_size, max_model_len=max_model_len, trust_remote_code=True)
45
  sampling_params = SamplingParams(temperature=0.6, max_tokens=256, stop_token_ids=[tokenizer.eos_token_id])
 
112
  ```
113
  lm_eval \
114
  --model vllm \
115
+ --model_args pretrained="neuralmagic/DeepSeek-R1-Distill-Llama-8B-FP8-Dynamic",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1,gpu_memory_utilization=0.8,enable_chunked_prefill=True,trust_remote_code=True \
116
  --tasks openllm \
117
  --write_out \
118
  --batch_size auto \
 
124
  ```
125
  lm_eval \
126
  --model vllm \
127
+ --model_args pretrained="neuralmagic/DeepSeek-R1-Distill-Llama-8B-FP8-Dynamic",dtype=auto,add_bos_token=False,max_model_len=4096,tensor_parallel_size=1,gpu_memory_utilization=0.8,enable_chunked_prefill=True,trust_remote_code=True \
128
  --apply_chat_template \
129
  --fewshot_as_multiturn \
130
  --tasks leaderboard \
 
143
  <th>Category</th>
144
  <th>Metric</th>
145
  <th>deepseek-ai/DeepSeek-R1-Distill-Llama-8B</th>
146
+ <th>neuralmagic/DeepSeek-R1-Distill-Llama-8B-FP8-Dynamic</th>
147
  <th>Recovery</th>
148
  </tr>
149
  </thead>