feihu.hf commited on
Commit
6d262b5
·
1 Parent(s): 8022daf

update readme

Browse files
Files changed (3) hide show
  1. README.md +10 -11
  2. generation_config.json +1 -1
  3. tokenizer_config.json +1 -1
README.md CHANGED
@@ -9,7 +9,7 @@ tags:
9
  - chat
10
  ---
11
 
12
- # QwQ-32B
13
 
14
  <a href="https://chat.qwenlm.ai/" target="_blank" style="margin: 2px;">
15
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
@@ -32,11 +32,11 @@ QwQ is the reasoning model of the Qwen series. Compared with conventional instru
32
 
33
  **Note:** For the best experience, please review the [usage guidelines](#usage-guidelines) before deploying QwQ models.
34
 
35
- For more details, please refer to our [blog](https://qwenlm.github.io/blog/qwen2.5/), [GitHub](https://github.com/QwenLM/Qwen2.5), and [Documentation](https://qwen.readthedocs.io/en/latest/).
36
 
37
  ## Requirements
38
 
39
- The code of Qwen2.5 has been in the latest Hugging face `transformers` and we advise you to use the latest version of `transformers`.
40
 
41
  With `transformers<4.37.0`, you will encounter the following error:
42
  ```
@@ -92,9 +92,8 @@ To achieve optimal performance, we recommend the following settings:
92
  1. **Enforce Thoughtful Output**: Ensure the model starts with "\<think\>\n" to prevent generating empty thinking content, which can degrade output quality. If you use `apply_chat_template` and set `add_generation_prompt=True`, this is already automatically implemented, but it may cause the response to lack the \<think\> tag at the beginning. This is normal behavior.
93
 
94
  2. **Sampling Parameters**:
95
- - Use Temperature=0.6 and TopP=0.95 instead of Greedy decoding to avoid endless repetitions and enhance diversity.
96
- - For complex reasoning tasks like math or coding, set TopK=40.
97
- - For other types of questions, use TopK=20.
98
 
99
  3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.
100
  - **Math Problems**: Include "Please reason step by step, and put your final answer within \boxed{}." in the prompt.
@@ -120,7 +119,7 @@ We advise adding the `rope_scaling` configuration only when processing long cont
120
 
121
  ## Evaluation & Performance
122
 
123
- Detailed evaluation results are reported in this [📑 blog](https://qwenlm.github.io/blog/qwen2.5/).
124
 
125
  For requirements on GPU memory and the respective throughput, see results [here](https://qwen.readthedocs.io/en/latest/benchmark/speed_benchmark.html).
126
 
@@ -129,12 +128,12 @@ For requirements on GPU memory and the respective throughput, see results [here]
129
  If you find our work helpful, feel free to give us a cite.
130
 
131
  ```
132
- @misc{qwen2.5,
133
  title = {Qwen2.5: A Party of Foundation Models},
134
- url = {https://qwenlm.github.io/blog/qwen2.5/},
135
  author = {Qwen Team},
136
- month = {September},
137
- year = {2024}
138
  }
139
 
140
  @article{qwen2,
 
9
  - chat
10
  ---
11
 
12
+ # QwQ-32B-AWQ
13
 
14
  <a href="https://chat.qwenlm.ai/" target="_blank" style="margin: 2px;">
15
  <img alt="Chat" src="https://img.shields.io/badge/%F0%9F%92%9C%EF%B8%8F%20Qwen%20Chat%20-536af5" style="display: inline-block; vertical-align: middle;"/>
 
32
 
33
  **Note:** For the best experience, please review the [usage guidelines](#usage-guidelines) before deploying QwQ models.
34
 
35
+ For more details, please refer to our [blog](https://qwenlm.github.io/blog/qwq-32b/), [GitHub](https://github.com/QwenLM/Qwen2.5), and [Documentation](https://qwen.readthedocs.io/en/latest/).
36
 
37
  ## Requirements
38
 
39
+ QwQ is based on Qwen2.5, whose code has been in the latest Hugging face `transformers`. We advise you to use the latest version of `transformers`.
40
 
41
  With `transformers<4.37.0`, you will encounter the following error:
42
  ```
 
92
  1. **Enforce Thoughtful Output**: Ensure the model starts with "\<think\>\n" to prevent generating empty thinking content, which can degrade output quality. If you use `apply_chat_template` and set `add_generation_prompt=True`, this is already automatically implemented, but it may cause the response to lack the \<think\> tag at the beginning. This is normal behavior.
93
 
94
  2. **Sampling Parameters**:
95
+ - Use Temperature=0.6 and TopP=0.95 instead of Greedy decoding to avoid endless repetitions.
96
+ - Use TopK between 20 and 40 to filter out rare token occurrences while maintaining the diversity of the generated output.
 
97
 
98
  3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.
99
  - **Math Problems**: Include "Please reason step by step, and put your final answer within \boxed{}." in the prompt.
 
119
 
120
  ## Evaluation & Performance
121
 
122
+ Detailed evaluation results are reported in this [📑 blog](https://qwenlm.github.io/blog/qwq-32b/).
123
 
124
  For requirements on GPU memory and the respective throughput, see results [here](https://qwen.readthedocs.io/en/latest/benchmark/speed_benchmark.html).
125
 
 
128
  If you find our work helpful, feel free to give us a cite.
129
 
130
  ```
131
+ @misc{qwq32b,
132
  title = {Qwen2.5: A Party of Foundation Models},
133
+ url = {https://qwenlm.github.io/blog/qwq-32b/},
134
  author = {Qwen Team},
135
+ month = {March},
136
+ year = {2025}
137
  }
138
 
139
  @article{qwen2,
generation_config.json CHANGED
@@ -8,7 +8,7 @@
8
  "pad_token_id": 151643,
9
  "repetition_penalty": 1.0,
10
  "temperature": 0.6,
11
- "top_k": 20,
12
  "top_p": 0.95,
13
  "transformers_version": "4.45.2"
14
  }
 
8
  "pad_token_id": 151643,
9
  "repetition_penalty": 1.0,
10
  "temperature": 0.6,
11
+ "top_k": 40,
12
  "top_p": 0.95,
13
  "transformers_version": "4.45.2"
14
  }
tokenizer_config.json CHANGED
@@ -227,7 +227,7 @@
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
230
- "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
231
  "clean_up_tokenization_spaces": false,
232
  "eos_token": "<|im_end|>",
233
  "errors": "replace",
 
227
  "<|video_pad|>"
228
  ],
229
  "bos_token": null,
230
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- '' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" and not message.tool_calls %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {%- set content = message.content.split('</think>')[-1].lstrip('\\n') %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n<think>\\n' }}\n{%- endif %}\n",
231
  "clean_up_tokenization_spaces": false,
232
  "eos_token": "<|im_end|>",
233
  "errors": "replace",