Text Generation
MLX
Safetensors
GGUF
qwen3
conversational
Adeel Ahmad commited on
Commit
dc0556c
·
1 Parent(s): cb5e5c9

feat: :tada:

Browse files
Files changed (37) hide show
  1. .DS_Store +0 -0
  2. .gitattributes +0 -45
  3. Modelfile +0 -255
  4. README.md +0 -317
  5. ReasonableQwen3-4B-Q4_k.gguf +3 -0
  6. ReasonableQwen3-4B-Q8_0.gguf +3 -0
  7. checkpoint_20250903_232422_periodic_update_2600/added_tokens.json +0 -38
  8. checkpoint_20250903_232422_periodic_update_2600/chat_template.jinja +0 -96
  9. checkpoint_20250903_232422_periodic_update_2600/config.json +0 -30
  10. checkpoint_20250903_232422_periodic_update_2600/merges.txt +0 -0
  11. checkpoint_20250903_232422_periodic_update_2600/special_tokens_map.json +0 -102
  12. checkpoint_20250903_232422_periodic_update_2600/tokenizer_config.json +0 -318
  13. checkpoint_20250903_232422_periodic_update_2600/training_state.json +0 -763
  14. checkpoint_20250903_232422_periodic_update_2600/vocab.json +0 -0
  15. checkpoint_20250903_232551_exit_request_update_2602/added_tokens.json +0 -38
  16. checkpoint_20250903_232551_exit_request_update_2602/chat_template.jinja +0 -96
  17. checkpoint_20250903_232551_exit_request_update_2602/config.json +0 -30
  18. checkpoint_20250903_232551_exit_request_update_2602/merges.txt +0 -0
  19. checkpoint_20250903_232551_exit_request_update_2602/special_tokens_map.json +0 -102
  20. checkpoint_20250903_232551_exit_request_update_2602/tokenizer_config.json +0 -318
  21. checkpoint_20250903_232551_exit_request_update_2602/training_state.json +0 -763
  22. checkpoint_20250903_232551_exit_request_update_2602/vocab.json +0 -0
  23. checkpoint_20250903_232617_shutdown_signal_update_2602/added_tokens.json +0 -38
  24. checkpoint_20250903_232617_shutdown_signal_update_2602/chat_template.jinja +0 -96
  25. checkpoint_20250903_232617_shutdown_signal_update_2602/config.json +0 -30
  26. checkpoint_20250903_232617_shutdown_signal_update_2602/merges.txt +0 -0
  27. checkpoint_20250903_232617_shutdown_signal_update_2602/special_tokens_map.json +0 -102
  28. checkpoint_20250903_232617_shutdown_signal_update_2602/tokenizer_config.json +0 -318
  29. checkpoint_20250903_232617_shutdown_signal_update_2602/training_state.json +0 -763
  30. checkpoint_20250903_232617_shutdown_signal_update_2602/vocab.json +0 -0
  31. model.safetensors +3 -0
  32. model.safetensors.index.json +0 -406
  33. plots/loss_vs_updates.png +0 -0
  34. plots/lr_vs_updates.png +0 -0
  35. plots/reward_vs_updates.png +0 -0
  36. tokenizer.json +3 -0
  37. training_state.json +0 -763
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
.gitattributes DELETED
@@ -1,45 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
- ReasonableQwen3-4B-Q4_k.gguf filter=lfs diff=lfs merge=lfs -text
38
- ReasonableQwen3-4B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
39
- ReasonableQwen3-4B-Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
40
- .DS_Store filter=lfs diff=lfs merge=lfs -text
41
- checkpoint_20250903_232422_periodic_update_2600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
- checkpoint_20250903_232551_exit_request_update_2602/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
- checkpoint_20250903_232617_shutdown_signal_update_2602/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
- samples_debug.jsonl filter=lfs diff=lfs merge=lfs -text
45
- training_debug.log filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
Modelfile DELETED
@@ -1,255 +0,0 @@
1
- From ./ReasonableQwen3-4B-Q4_k.gguf
2
-
3
- TEMPLATE """{{- if .Messages }}
4
- {{- if or .System .Tools }}<|im_start|>system
5
- {{- if .System }}
6
- {{ .System }}
7
- {{- end }}
8
- {{- if .Tools }}
9
-
10
- # Tools
11
-
12
- You may call one or more functions to assist with the user query.
13
-
14
- You are provided with function signatures within <tools></tools> XML tags:
15
- <tools>
16
- {{- range .Tools }}
17
- {"type": "function", "function": {{ .Function }}}
18
- {{- end }}
19
- </tools>
20
-
21
- For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
22
- <tool_call>
23
- {"name": <function-name>, "arguments": <args-json-object>}
24
- </tool_call>
25
- {{- end }}<|im_end|>
26
- {{ end }}
27
- {{- range $i, $_ := .Messages }}
28
- {{- $last := eq (len (slice $.Messages $i)) 1 -}}
29
- {{- if eq .Role "user" }}<|im_start|>user
30
- {{ .Content }}<|im_end|>
31
- {{ else if eq .Role "assistant" }}<|im_start|>assistant
32
- {{ if .Content }}{{ .Content }}
33
- {{- else if .ToolCalls }}<tool_call>
34
- {{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}
35
- {{ end }}</tool_call>
36
- {{- end }}{{ if not $last }}<|im_end|>
37
- {{ end }}
38
- {{- else if eq .Role "tool" }}<|im_start|>user
39
- <tool_response>
40
- {{ .Content }}
41
- </tool_response><|im_end|>
42
- {{ end }}
43
- {{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
44
- {{ end }}
45
- {{- end }}
46
- {{- else }}
47
- {{- if .System }}<|im_start|>system
48
- {{ .System }}<|im_end|>
49
- {{ end }}{{ if .Prompt }}<|im_start|>user
50
- {{ .Prompt }}<|im_end|>
51
- {{ end }}<|im_start|>assistant
52
- {{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}"""
53
- PARAMETER repeat_penalty 1
54
- PARAMETER stop <|im_start|>
55
- PARAMETER stop <|im_end|>
56
- PARAMETER temperature 0.6
57
- PARAMETER top_k 20
58
- PARAMETER top_p 0.95
59
- LICENSE """ Apache License
60
- Version 2.0, January 2004
61
- http://www.apache.org/licenses/
62
-
63
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
64
-
65
- 1. Definitions.
66
-
67
- "License" shall mean the terms and conditions for use, reproduction,
68
- and distribution as defined by Sections 1 through 9 of this document.
69
-
70
- "Licensor" shall mean the copyright owner or entity authorized by
71
- the copyright owner that is granting the License.
72
-
73
- "Legal Entity" shall mean the union of the acting entity and all
74
- other entities that control, are controlled by, or are under common
75
- control with that entity. For the purposes of this definition,
76
- "control" means (i) the power, direct or indirect, to cause the
77
- direction or management of such entity, whether by contract or
78
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
79
- outstanding shares, or (iii) beneficial ownership of such entity.
80
-
81
- "You" (or "Your") shall mean an individual or Legal Entity
82
- exercising permissions granted by this License.
83
-
84
- "Source" form shall mean the preferred form for making modifications,
85
- including but not limited to software source code, documentation
86
- source, and configuration files.
87
-
88
- "Object" form shall mean any form resulting from mechanical
89
- transformation or translation of a Source form, including but
90
- not limited to compiled object code, generated documentation,
91
- and conversions to other media types.
92
-
93
- "Work" shall mean the work of authorship, whether in Source or
94
- Object form, made available under the License, as indicated by a
95
- copyright notice that is included in or attached to the work
96
- (an example is provided in the Appendix below).
97
-
98
- "Derivative Works" shall mean any work, whether in Source or Object
99
- form, that is based on (or derived from) the Work and for which the
100
- editorial revisions, annotations, elaborations, or other modifications
101
- represent, as a whole, an original work of authorship. For the purposes
102
- of this License, Derivative Works shall not include works that remain
103
- separable from, or merely link (or bind by name) to the interfaces of,
104
- the Work and Derivative Works thereof.
105
-
106
- "Contribution" shall mean any work of authorship, including
107
- the original version of the Work and any modifications or additions
108
- to that Work or Derivative Works thereof, that is intentionally
109
- submitted to Licensor for inclusion in the Work by the copyright owner
110
- or by an individual or Legal Entity authorized to submit on behalf of
111
- the copyright owner. For the purposes of this definition, "submitted"
112
- means any form of electronic, verbal, or written communication sent
113
- to the Licensor or its representatives, including but not limited to
114
- communication on electronic mailing lists, source code control systems,
115
- and issue tracking systems that are managed by, or on behalf of, the
116
- Licensor for the purpose of discussing and improving the Work, but
117
- excluding communication that is conspicuously marked or otherwise
118
- designated in writing by the copyright owner as "Not a Contribution."
119
-
120
- "Contributor" shall mean Licensor and any individual or Legal Entity
121
- on behalf of whom a Contribution has been received by Licensor and
122
- subsequently incorporated within the Work.
123
-
124
- 2. Grant of Copyright License. Subject to the terms and conditions of
125
- this License, each Contributor hereby grants to You a perpetual,
126
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
127
- copyright license to reproduce, prepare Derivative Works of,
128
- publicly display, publicly perform, sublicense, and distribute the
129
- Work and such Derivative Works in Source or Object form.
130
-
131
- 3. Grant of Patent License. Subject to the terms and conditions of
132
- this License, each Contributor hereby grants to You a perpetual,
133
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
134
- (except as stated in this section) patent license to make, have made,
135
- use, offer to sell, sell, import, and otherwise transfer the Work,
136
- where such license applies only to those patent claims licensable
137
- by such Contributor that are necessarily infringed by their
138
- Contribution(s) alone or by combination of their Contribution(s)
139
- with the Work to which such Contribution(s) was submitted. If You
140
- institute patent litigation against any entity (including a
141
- cross-claim or counterclaim in a lawsuit) alleging that the Work
142
- or a Contribution incorporated within the Work constitutes direct
143
- or contributory patent infringement, then any patent licenses
144
- granted to You under this License for that Work shall terminate
145
- as of the date such litigation is filed.
146
-
147
- 4. Redistribution. You may reproduce and distribute copies of the
148
- Work or Derivative Works thereof in any medium, with or without
149
- modifications, and in Source or Object form, provided that You
150
- meet the following conditions:
151
-
152
- (a) You must give any other recipients of the Work or
153
- Derivative Works a copy of this License; and
154
-
155
- (b) You must cause any modified files to carry prominent notices
156
- stating that You changed the files; and
157
-
158
- (c) You must retain, in the Source form of any Derivative Works
159
- that You distribute, all copyright, patent, trademark, and
160
- attribution notices from the Source form of the Work,
161
- excluding those notices that do not pertain to any part of
162
- the Derivative Works; and
163
-
164
- (d) If the Work includes a "NOTICE" text file as part of its
165
- distribution, then any Derivative Works that You distribute must
166
- include a readable copy of the attribution notices contained
167
- within such NOTICE file, excluding those notices that do not
168
- pertain to any part of the Derivative Works, in at least one
169
- of the following places: within a NOTICE text file distributed
170
- as part of the Derivative Works; within the Source form or
171
- documentation, if provided along with the Derivative Works; or,
172
- within a display generated by the Derivative Works, if and
173
- wherever such third-party notices normally appear. The contents
174
- of the NOTICE file are for informational purposes only and
175
- do not modify the License. You may add Your own attribution
176
- notices within Derivative Works that You distribute, alongside
177
- or as an addendum to the NOTICE text from the Work, provided
178
- that such additional attribution notices cannot be construed
179
- as modifying the License.
180
-
181
- You may add Your own copyright statement to Your modifications and
182
- may provide additional or different license terms and conditions
183
- for use, reproduction, or distribution of Your modifications, or
184
- for any such Derivative Works as a whole, provided Your use,
185
- reproduction, and distribution of the Work otherwise complies with
186
- the conditions stated in this License.
187
-
188
- 5. Submission of Contributions. Unless You explicitly state otherwise,
189
- any Contribution intentionally submitted for inclusion in the Work
190
- by You to the Licensor shall be under the terms and conditions of
191
- this License, without any additional terms or conditions.
192
- Notwithstanding the above, nothing herein shall supersede or modify
193
- the terms of any separate license agreement you may have executed
194
- with Licensor regarding such Contributions.
195
-
196
- 6. Trademarks. This License does not grant permission to use the trade
197
- names, trademarks, service marks, or product names of the Licensor,
198
- except as required for reasonable and customary use in describing the
199
- origin of the Work and reproducing the content of the NOTICE file.
200
-
201
- 7. Disclaimer of Warranty. Unless required by applicable law or
202
- agreed to in writing, Licensor provides the Work (and each
203
- Contributor provides its Contributions) on an "AS IS" BASIS,
204
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
205
- implied, including, without limitation, any warranties or conditions
206
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
207
- PARTICULAR PURPOSE. You are solely responsible for determining the
208
- appropriateness of using or redistributing the Work and assume any
209
- risks associated with Your exercise of permissions under this License.
210
-
211
- 8. Limitation of Liability. In no event and under no legal theory,
212
- whether in tort (including negligence), contract, or otherwise,
213
- unless required by applicable law (such as deliberate and grossly
214
- negligent acts) or agreed to in writing, shall any Contributor be
215
- liable to You for damages, including any direct, indirect, special,
216
- incidental, or consequential damages of any character arising as a
217
- result of this License or out of the use or inability to use the
218
- Work (including but not limited to damages for loss of goodwill,
219
- work stoppage, computer failure or malfunction, or any and all
220
- other commercial damages or losses), even if such Contributor
221
- has been advised of the possibility of such damages.
222
-
223
- 9. Accepting Warranty or Additional Liability. While redistributing
224
- the Work or Derivative Works thereof, You may choose to offer,
225
- and charge a fee for, acceptance of support, warranty, indemnity,
226
- or other liability obligations and/or rights consistent with this
227
- License. However, in accepting such obligations, You may act only
228
- on Your own behalf and on Your sole responsibility, not on behalf
229
- of any other Contributor, and only if You agree to indemnify,
230
- defend, and hold each Contributor harmless for any liability
231
- incurred by, or claims asserted against, such Contributor by reason
232
- of your accepting any such warranty or additional liability.
233
-
234
- END OF TERMS AND CONDITIONS
235
-
236
- APPENDIX: How to apply the Apache License to your work.
237
-
238
- To apply the Apache License to your work, attach the following
239
- boilerplate notice, with the fields enclosed by brackets "[]"
240
- replaced with your own identifying information. (Don't include
241
- the brackets!) The text should be enclosed in the appropriate
242
- comment syntax for the file format. We also recommend that a
243
- file or class name and description of purpose be included on the
244
- same "printed page" as the copyright notice for easier
245
- identification within third-party archives.
246
- Copyright 2024 Alibaba Cloud
247
- Licensed under the Apache License, Version 2.0 (the "License");
248
- you may not use this file except in compliance with the License.
249
- You may obtain a copy of the License at
250
- http://www.apache.org/licenses/LICENSE-2.0
251
- Unless required by applicable law or agreed to in writing, software
252
- distributed under the License is distributed on an "AS IS" BASIS,
253
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
254
- See the License for the specific language governing permissions and
255
- limitations under the License."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md DELETED
@@ -1,317 +0,0 @@
1
- ---
2
- library_name: mlx
3
- license: apache-2.0
4
- license_link: https://huggingface.co/Qwen/Qwen3-14B/blob/main/LICENSE
5
- pipeline_tag: text-generation
6
- base_model:
7
- - Qwen/Qwen3-4B
8
- ---
9
-
10
- # ReasonableQwen3-4B
11
-
12
- ## Qwen3 Highlights
13
-
14
- Qwen3 is the latest generation of large language models in Qwen series, offering a comprehensive suite of dense and mixture-of-experts (MoE) models. Built upon extensive training, Qwen3 delivers groundbreaking advancements in reasoning, instruction-following, agent capabilities, and multilingual support, with the following key features:
15
-
16
- - **Uniquely support of seamless switching between thinking mode** (for complex logical reasoning, math, and coding) and **non-thinking mode** (for efficient, general-purpose dialogue) **within single model**, ensuring optimal performance across various scenarios.
17
- - **Significantly enhancement in its reasoning capabilities**, surpassing previous QwQ (in thinking mode) and Qwen2.5 instruct models (in non-thinking mode) on mathematics, code generation, and commonsense logical reasoning.
18
- - **Superior human preference alignment**, excelling in creative writing, role-playing, multi-turn dialogues, and instruction following, to deliver a more natural, engaging, and immersive conversational experience.
19
- - **Expertise in agent capabilities**, enabling precise integration with external tools in both thinking and unthinking modes and achieving leading performance among open-source models in complex agent-based tasks.
20
- - **Support of 100+ languages and dialects** with strong capabilities for **multilingual instruction following** and **translation**.
21
-
22
- ## Model Overview
23
-
24
- **Qwen3-4B** has the following features:
25
- - Type: Causal Language Models
26
- - Training Stage: Pretraining & Post-training
27
- - Number of Parameters: 4.0B
28
- - Number of Paramaters (Non-Embedding): 3.6B
29
- - Number of Layers: 36
30
- - Number of Attention Heads (GQA): 32 for Q and 8 for KV
31
- - Context Length: 32,768 natively and [131,072 tokens with YaRN](#processing-long-texts).
32
-
33
-
34
- For more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our [blog](https://qwenlm.github.io/blog/qwen3/), [GitHub](https://github.com/QwenLM/Qwen3), and [Documentation](https://qwen.readthedocs.io/en/latest/).
35
-
36
- ## Quickstart
37
-
38
- The code of Qwen3 has been in the latest versions of both **`transformers` (≥ 4.52.4)** and **`mlx_lm` (≥ 0.25.2)**, and we advise you to use the latest version of `transformers` and `mlx_lm`.
39
- Older versions (e.g., `transformers<4.51.0`) may raise errors like:
40
-
41
- ```text
42
- KeyError: 'qwen3'
43
- ```
44
-
45
- Install or upgrade both packages:
46
-
47
- ```bash
48
- pip install --upgrade transformers mlx_lm
49
- ```
50
-
51
- The following contains a code snippet illustrating how to use the model generate content based on given inputs.
52
-
53
- ```python
54
- from mlx_lm import load, generate
55
-
56
- model, tokenizer = load("adeelahmad/ReasonableQwen3-4B")
57
- prompt = "Hello, please introduce yourself and tell me what you can do."
58
-
59
- if tokenizer.chat_template is not None:
60
- messages = [{"role": "user", "content": prompt}]
61
- prompt = tokenizer.apply_chat_template(
62
- messages,
63
- add_generation_prompt=True
64
- )
65
-
66
- response = generate(
67
- model,
68
- tokenizer,
69
- prompt=prompt,
70
- verbose=True,
71
- max_tokens=1024
72
- )
73
- print(response)
74
- ```
75
-
76
- ## Switching Between Thinking and Non-Thinking Mode
77
-
78
- > [!TIP]
79
- > The `enable_thinking` switch is also available in APIs created by SGLang and vLLM.
80
- > Please refer to our documentation for [SGLang](https://qwen.readthedocs.io/en/latest/deployment/sglang.html#thinking-non-thinking-modes) and [vLLM](https://qwen.readthedocs.io/en/latest/deployment/vllm.html#thinking-non-thinking-modes) users.
81
-
82
- ### `enable_thinking=True`
83
-
84
- By default, Qwen3 has thinking capabilities enabled, similar to QwQ-32B. This means the model will use its reasoning abilities to enhance the quality of generated responses. For example, when explicitly setting `enable_thinking=True` or leaving it as the default value in `tokenizer.apply_chat_template`, the model will engage its thinking mode.
85
-
86
- ```python
87
- text = tokenizer.apply_chat_template(
88
- messages,
89
- tokenize=False,
90
- add_generation_prompt=True,
91
- enable_thinking=True # True is the default value for enable_thinking
92
- )
93
- ```
94
-
95
- In this mode, the model will generate think content wrapped in a `<think>...</think>` block, followed by the final response.
96
-
97
- > [!NOTE]
98
- > For thinking mode, use `Temperature=0.6`, `TopP=0.95`, `TopK=20`, and `MinP=0` (the default setting in `generation_config.json`). **DO NOT use greedy decoding**, as it can lead to performance degradation and endless repetitions. For more detailed guidance, please refer to the [Best Practices](#best-practices) section.
99
-
100
-
101
- ### `enable_thinking=False`
102
-
103
- We provide a hard switch to strictly disable the model's thinking behavior, aligning its functionality with the previous Qwen2.5-Instruct models. This mode is particularly useful in scenarios where disabling thinking is essential for enhancing efficiency.
104
-
105
- ```python
106
- text = tokenizer.apply_chat_template(
107
- messages,
108
- tokenize=False,
109
- add_generation_prompt=True,
110
- enable_thinking=False # Setting enable_thinking=False disables thinking mode
111
- )
112
- ```
113
-
114
- In this mode, the model will not generate any think content and will not include a `<think>...</think>` block.
115
-
116
- > [!NOTE]
117
- > For non-thinking mode, we suggest using `Temperature=0.7`, `TopP=0.8`, `TopK=20`, and `MinP=0`. For more detailed guidance, please refer to the [Best Practices](#best-practices) section.
118
-
119
- ### Advanced Usage: Switching Between Thinking and Non-Thinking Modes via User Input
120
-
121
- We provide a soft switch mechanism that allows users to dynamically control the model's behavior when `enable_thinking=True`. Specifically, you can add `/think` and `/no_think` to user prompts or system messages to switch the model's thinking mode from turn to turn. The model will follow the most recent instruction in multi-turn conversations.
122
-
123
- Here is an example of a multi-turn conversation:
124
-
125
- ```python
126
- from mlx_lm import load, generate
127
-
128
-
129
- class QwenChatbot:
130
- def __init__(self, model_name="adeelahmad/ReasonableQwen3-4B"):
131
- self.model, self.tokenizer = load(model_name)
132
- self.history = []
133
-
134
- def generate_response(self, user_input):
135
- messages = self.history + [{"role": "user", "content": user_input}]
136
-
137
- text = self.tokenizer.apply_chat_template(
138
- messages,
139
- tokenize=False,
140
- add_generation_prompt=True
141
- )
142
-
143
- response = generate(
144
- self.model,
145
- self.tokenizer,
146
- prompt=text,
147
- verbose=True,
148
- max_tokens=32768
149
- )
150
- # Update history
151
- self.history.append({"role": "user", "content": user_input})
152
- self.history.append({"role": "assistant", "content": response})
153
-
154
- return response
155
-
156
-
157
- # Example Usage
158
- if __name__ == "__main__":
159
- chatbot = QwenChatbot()
160
-
161
- # First input (without /think or /no_think tags, thinking mode is enabled by default)
162
- user_input_1 = "How many 'r's are in strawberries?"
163
- print(f"User: {user_input_1}")
164
- response_1 = chatbot.generate_response(user_input_1)
165
- print(f"Bot: {response_1}")
166
- print("----------------------")
167
-
168
- # Second input with /no_think
169
- user_input_2 = "Then, how many 'r's are in blueberries? /no_think"
170
- print(f"User: {user_input_2}")
171
- response_2 = chatbot.generate_response(user_input_2)
172
- print(f"Bot: {response_2}")
173
- print("----------------------")
174
-
175
- # Third input with /think
176
- user_input_3 = "Really? /think"
177
- print(f"User: {user_input_3}")
178
- response_3 = chatbot.generate_response(user_input_3)
179
- print(f"Bot: {response_3}")
180
- ```
181
-
182
- > [!NOTE]
183
- > For API compatibility, when `enable_thinking=True`, regardless of whether the user uses `/think` or `/no_think`, the model will always output a block wrapped in `<think>...</think>`. However, the content inside this block may be empty if thinking is disabled.
184
- > When `enable_thinking=False`, the soft switches are not valid. Regardless of any `/think` or `/no_think` tags input by the user, the model will not generate think content and will not include a `<think>...</think>` block.
185
-
186
- ## Agentic Use
187
-
188
- Qwen3 excels in tool calling capabilities. We recommend using [Qwen-Agent](https://github.com/QwenLM/Qwen-Agent) to make the best use of agentic ability of Qwen3. Qwen-Agent encapsulates tool-calling templates and tool-calling parsers internally, greatly reducing coding complexity.
189
-
190
- To define the available tools, you can use the MCP configuration file, use the integrated tool of Qwen-Agent, or integrate other tools by yourself.
191
-
192
- ```python
193
- from qwen_agent.agents import Assistant
194
-
195
- # Define LLM
196
- llm_cfg = {
197
- "model": "adeelahmad/ReasonableQwen3-4B",
198
-
199
- # Use the endpoint provided by Alibaba Model Studio:
200
- # "model_type": "qwen_dashscope",
201
- # "api_key": os.getenv("DASHSCOPE_API_KEY"),
202
-
203
- # Use a custom endpoint compatible with OpenAI API:
204
- "model_server": "http://localhost:8000/v1", # api_base
205
- "api_key": "EMPTY",
206
-
207
- # Other parameters:
208
- # "generate_cfg": {
209
- # # Add: When the response content is `<think>this is the thought</think>this is the answer;
210
- # # Do not add: When the response has been separated by reasoning_content and content.
211
- # "thought_in_content": True,
212
- # },
213
- }
214
-
215
- # Define Tools
216
- tools = [
217
- {
218
- "mcpServers": { # You can specify the MCP configuration file
219
- "time": {
220
- "command": "uvx",
221
- "args": ["mcp-server-time", "--local-timezone=Asia/Shanghai"]
222
- },
223
- "fetch": {
224
- "command": "uvx",
225
- "args": ["mcp-server-fetch"]
226
- }
227
- }
228
- },
229
- "code_interpreter", # Built-in tools
230
- ]
231
-
232
- # Define Agent
233
- bot = Assistant(llm=llm_cfg, function_list=tools)
234
-
235
- # Streaming generation
236
- messages = [
237
- {
238
- "role": "user",
239
- "content": "https://qwenlm.github.io/blog/ Introduce the latest developments of Qwen"
240
- }
241
- ]
242
-
243
- for responses in bot.run(messages=messages):
244
- pass
245
-
246
- print(responses)
247
- ```
248
-
249
- ## Processing Long Texts
250
-
251
- Qwen3 natively supports context lengths of up to 32,768 tokens. For conversations where the total length (including both input and output) significantly exceeds this limit, we recommend using RoPE scaling techniques to handle long texts effectively. We have validated the model's performance on context lengths of up to 131,072 tokens using the [YaRN](https://arxiv.org/abs/2309.00071) method.
252
-
253
- YaRN is currently supported by several inference frameworks, e.g., `transformers` and `llama.cpp` for local use, `vllm` and `sglang` for deployment. In general, there are two approaches to enabling YaRN for supported frameworks:
254
-
255
- - Modifying the model files:
256
- In the `config.json` file, add the `rope_scaling` fields:
257
- ```json
258
- {
259
- ...,
260
- "rope_scaling": {
261
- "rope_type": "yarn",
262
- "factor": 4.0,
263
- "original_max_position_embeddings": 32768
264
- }
265
- }
266
- ```
267
-
268
- > [!IMPORTANT]
269
- > If you encounter the following warning
270
- > ```
271
- > Unrecognized keys in `rope_scaling` for 'rope_type'='yarn': {'original_max_position_embeddings'}
272
- > ```
273
- > please upgrade `transformers>=4.51.0`.
274
-
275
- > [!NOTE]
276
- > All the notable open-source frameworks implement static YaRN, which means the scaling factor remains constant regardless of input length, **potentially impacting performance on shorter texts.**
277
- > We advise adding the `rope_scaling` configuration only when processing long contexts is required.
278
- > It is also recommended to modify the `factor` as needed. For example, if the typical context length for your application is 65,536 tokens, it would be better to set `factor` as 2.0.
279
-
280
- > [!NOTE]
281
- > The default `max_position_embeddings` in `config.json` is set to 40,960. This allocation includes reserving 32,768 tokens for outputs and 8,192 tokens for typical prompts, which is sufficient for most scenarios involving short text processing. If the average context length does not exceed 32,768 tokens, we do not recommend enabling YaRN in this scenario, as it may potentially degrade model performance.
282
-
283
- > [!TIP]
284
- > The endpoint provided by Alibaba Model Studio supports dynamic YaRN by default and no extra configuration is needed.
285
-
286
- ## Best Practices
287
-
288
- To achieve optimal performance, we recommend the following settings:
289
-
290
- 1. **Sampling Parameters**:
291
- - For thinking mode (`enable_thinking=True`), use `Temperature=0.6`, `TopP=0.95`, `TopK=20`, and `MinP=0`. **DO NOT use greedy decoding**, as it can lead to performance degradation and endless repetitions.
292
- - For non-thinking mode (`enable_thinking=False`), we suggest using `Temperature=0.7`, `TopP=0.8`, `TopK=20`, and `MinP=0`.
293
- - For supported frameworks, you can adjust the `presence_penalty` parameter between 0 and 2 to reduce endless repetitions. However, using a higher value may occasionally result in language mixing and a slight decrease in model performance.
294
-
295
- 2. **Adequate Output Length**: We recommend using an output length of 32,768 tokens for most queries. For benchmarking on highly complex problems, such as those found in math and programming competitions, we suggest setting the max output length to 38,912 tokens. This provides the model with sufficient space to generate detailed and comprehensive responses, thereby enhancing its overall performance.
296
-
297
- 3. **Standardize Output Format**: We recommend using prompts to standardize model outputs when benchmarking.
298
- - **Math Problems**: Include "Please reason step by step, and put your final answer within \boxed{}." in the prompt.
299
- - **Multiple-Choice Questions**: Add the following JSON structure to the prompt to standardize responses: "Please show your choice in the `answer` field with only the choice letter, e.g., `"answer": "C"`."
300
-
301
- 4. **No Thinking Content in History**: In multi-turn conversations, the historical model output should only include the final output part and does not need to include the thinking content. It is implemented in the provided chat template in Jinja2. However, for frameworks that do not directly use the Jinja2 chat template, it is up to the developers to ensure that the best practice is followed.
302
-
303
- ### Citation
304
-
305
- If you find our work helpful, feel free to give us a cite.
306
-
307
- ```
308
- @misc{qwen3technicalreport,
309
- title={Qwen3 Technical Report},
310
- author={Qwen Team},
311
- year={2025},
312
- eprint={2505.09388},
313
- archivePrefix={arXiv},
314
- primaryClass={cs.CL},
315
- url={https://arxiv.org/abs/2505.09388},
316
- }
317
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ReasonableQwen3-4B-Q4_k.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1ff6cccba6817b45b9d82ce1b9341a295be6613a9ef3753030ba0aa067116dd
3
+ size 2497280672
ReasonableQwen3-4B-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:756d484ba5138d328eae40f0629c94f56a130caf6e9907432fc947d85c7f6c49
3
+ size 4280405152
checkpoint_20250903_232422_periodic_update_2600/added_tokens.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "</answer>": 151669,
3
- "</img_base64>": 151670,
4
- "</json_output>": 151671,
5
- "</ocr_text>": 151672,
6
- "</think>": 151668,
7
- "</tool_call>": 151658,
8
- "</tool_code>": 151673,
9
- "</tool_response>": 151666,
10
- "<answer>": 151674,
11
- "<img_base64>": 151675,
12
- "<json_output>": 151676,
13
- "<ocr_text>": 151677,
14
- "<think>": 151667,
15
- "<tool_call>": 151657,
16
- "<tool_code>": 151678,
17
- "<tool_response>": 151665,
18
- "<|box_end|>": 151649,
19
- "<|box_start|>": 151648,
20
- "<|endoftext|>": 151643,
21
- "<|file_sep|>": 151664,
22
- "<|fim_middle|>": 151660,
23
- "<|fim_pad|>": 151662,
24
- "<|fim_prefix|>": 151659,
25
- "<|fim_suffix|>": 151661,
26
- "<|im_end|>": 151645,
27
- "<|im_start|>": 151644,
28
- "<|image_pad|>": 151655,
29
- "<|object_ref_end|>": 151647,
30
- "<|object_ref_start|>": 151646,
31
- "<|quad_end|>": 151651,
32
- "<|quad_start|>": 151650,
33
- "<|repo_name|>": 151663,
34
- "<|video_pad|>": 151656,
35
- "<|vision_end|>": 151653,
36
- "<|vision_pad|>": 151654,
37
- "<|vision_start|>": 151652
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232422_periodic_update_2600/chat_template.jinja DELETED
@@ -1,96 +0,0 @@
1
- {%- if tools %}
2
- {{- '<|im_start|>system\n' }}
3
- {%- if messages[0].role == 'system' %}
4
- {{- messages[0].content + '\n\n' }}
5
- {%- endif %}
6
- {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
- {%- for tool in tools %}
8
- {{- "\n" }}
9
- {{- tool | tojson }}
10
- {%- endfor %}
11
- {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
- {%- else %}
13
- {%- if messages[0].role == 'system' %}
14
- {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
- {%- endif %}
16
- {%- endif %}
17
- {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
- {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- set tool_start = "<tool_response>" %}
21
- {%- set tool_start_length = tool_start|length %}
22
- {%- set start_of_message = message.content[:tool_start_length] %}
23
- {%- set tool_end = "</tool_response>" %}
24
- {%- set tool_end_length = tool_end|length %}
25
- {%- set start_pos = (message.content|length) - tool_end_length %}
26
- {%- if start_pos < 0 %}
27
- {%- set start_pos = 0 %}
28
- {%- endif %}
29
- {%- set end_of_message = message.content[start_pos:] %}
30
- {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
31
- {%- set ns.multi_step_tool = false %}
32
- {%- set ns.last_query_index = index %}
33
- {%- endif %}
34
- {%- endfor %}
35
- {%- for message in messages %}
36
- {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
37
- {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
38
- {%- elif message.role == "assistant" %}
39
- {%- set content = message.content %}
40
- {%- set reasoning_content = '' %}
41
- {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
42
- {%- set reasoning_content = message.reasoning_content %}
43
- {%- else %}
44
- {%- if '</think>' in message.content %}
45
- {%- set content = (message.content.split('</think>')|last).lstrip('\n') %}
46
- {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\n') %}
47
- {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
48
- {%- endif %}
49
- {%- endif %}
50
- {%- if loop.index0 > ns.last_query_index %}
51
- {%- if loop.last or (not loop.last and reasoning_content) %}
52
- {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
53
- {%- else %}
54
- {{- '<|im_start|>' + message.role + '\n' + content }}
55
- {%- endif %}
56
- {%- else %}
57
- {{- '<|im_start|>' + message.role + '\n' + content }}
58
- {%- endif %}
59
- {%- if message.tool_calls %}
60
- {%- for tool_call in message.tool_calls %}
61
- {%- if (loop.first and content) or (not loop.first) %}
62
- {{- '\n' }}
63
- {%- endif %}
64
- {%- if tool_call.function %}
65
- {%- set tool_call = tool_call.function %}
66
- {%- endif %}
67
- {{- '<tool_call>\n{"name": "' }}
68
- {{- tool_call.name }}
69
- {{- '", "arguments": ' }}
70
- {%- if tool_call.arguments is string %}
71
- {{- tool_call.arguments }}
72
- {%- else %}
73
- {{- tool_call.arguments | tojson }}
74
- {%- endif %}
75
- {{- '}\n</tool_call>' }}
76
- {%- endfor %}
77
- {%- endif %}
78
- {{- '<|im_end|>\n' }}
79
- {%- elif message.role == "tool" %}
80
- {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
81
- {{- '<|im_start|>user' }}
82
- {%- endif %}
83
- {{- '\n<tool_response>\n' }}
84
- {{- message.content }}
85
- {{- '\n</tool_response>' }}
86
- {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
87
- {{- '<|im_end|>\n' }}
88
- {%- endif %}
89
- {%- endif %}
90
- {%- endfor %}
91
- {%- if add_generation_prompt %}
92
- {{- '<|im_start|>assistant\n' }}
93
- {%- if enable_thinking is defined and enable_thinking is false %}
94
- {{- '<think>\n\n</think>\n\n' }}
95
- {%- endif %}
96
- {%- endif %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232422_periodic_update_2600/config.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
- "eos_token_id": 151645,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 2560,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 9728,
14
- "max_position_embeddings": 40960,
15
- "max_window_layers": 36,
16
- "model_type": "qwen3",
17
- "num_attention_heads": 32,
18
- "num_hidden_layers": 36,
19
- "num_key_value_heads": 8,
20
- "rms_norm_eps": 1e-06,
21
- "rope_scaling": null,
22
- "rope_theta": 1000000,
23
- "sliding_window": null,
24
- "tie_word_embeddings": true,
25
- "torch_dtype": "bfloat16",
26
- "transformers_version": "4.51.0",
27
- "use_cache": true,
28
- "use_sliding_window": false,
29
- "vocab_size": 151936
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232422_periodic_update_2600/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232422_periodic_update_2600/special_tokens_map.json DELETED
@@ -1,102 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- {
4
- "content": "</answer>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "</img_base64>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "</json_output>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "</ocr_text>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "</think>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</tool_code>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "<answer>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "<img_base64>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "<json_output>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "<ocr_text>",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
- {
74
- "content": "<think>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
- },
80
- {
81
- "content": "<tool_code>",
82
- "lstrip": false,
83
- "normalized": false,
84
- "rstrip": false,
85
- "single_word": false
86
- }
87
- ],
88
- "eos_token": {
89
- "content": "<|im_end|>",
90
- "lstrip": false,
91
- "normalized": false,
92
- "rstrip": false,
93
- "single_word": false
94
- },
95
- "pad_token": {
96
- "content": "<|endoftext|>",
97
- "lstrip": false,
98
- "normalized": false,
99
- "rstrip": false,
100
- "single_word": false
101
- }
102
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232422_periodic_update_2600/tokenizer_config.json DELETED
@@ -1,318 +0,0 @@
1
- {
2
- "add_bos_token": false,
3
- "add_prefix_space": false,
4
- "added_tokens_decoder": {
5
- "151643": {
6
- "content": "<|endoftext|>",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "151644": {
14
- "content": "<|im_start|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "151645": {
22
- "content": "<|im_end|>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "151646": {
30
- "content": "<|object_ref_start|>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "151647": {
38
- "content": "<|object_ref_end|>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "151648": {
46
- "content": "<|box_start|>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "151649": {
54
- "content": "<|box_end|>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- },
61
- "151650": {
62
- "content": "<|quad_start|>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": true
68
- },
69
- "151651": {
70
- "content": "<|quad_end|>",
71
- "lstrip": false,
72
- "normalized": false,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": true
76
- },
77
- "151652": {
78
- "content": "<|vision_start|>",
79
- "lstrip": false,
80
- "normalized": false,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": true
84
- },
85
- "151653": {
86
- "content": "<|vision_end|>",
87
- "lstrip": false,
88
- "normalized": false,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": true
92
- },
93
- "151654": {
94
- "content": "<|vision_pad|>",
95
- "lstrip": false,
96
- "normalized": false,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": true
100
- },
101
- "151655": {
102
- "content": "<|image_pad|>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": true
108
- },
109
- "151656": {
110
- "content": "<|video_pad|>",
111
- "lstrip": false,
112
- "normalized": false,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": true
116
- },
117
- "151657": {
118
- "content": "<tool_call>",
119
- "lstrip": false,
120
- "normalized": false,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": false
124
- },
125
- "151658": {
126
- "content": "</tool_call>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": false
132
- },
133
- "151659": {
134
- "content": "<|fim_prefix|>",
135
- "lstrip": false,
136
- "normalized": false,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": false
140
- },
141
- "151660": {
142
- "content": "<|fim_middle|>",
143
- "lstrip": false,
144
- "normalized": false,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": false
148
- },
149
- "151661": {
150
- "content": "<|fim_suffix|>",
151
- "lstrip": false,
152
- "normalized": false,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": false
156
- },
157
- "151662": {
158
- "content": "<|fim_pad|>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": false
164
- },
165
- "151663": {
166
- "content": "<|repo_name|>",
167
- "lstrip": false,
168
- "normalized": false,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": false
172
- },
173
- "151664": {
174
- "content": "<|file_sep|>",
175
- "lstrip": false,
176
- "normalized": false,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": false
180
- },
181
- "151665": {
182
- "content": "<tool_response>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": false
188
- },
189
- "151666": {
190
- "content": "</tool_response>",
191
- "lstrip": false,
192
- "normalized": false,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": false
196
- },
197
- "151667": {
198
- "content": "<think>",
199
- "lstrip": false,
200
- "normalized": false,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": true
204
- },
205
- "151668": {
206
- "content": "</think>",
207
- "lstrip": false,
208
- "normalized": false,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": true
212
- },
213
- "151669": {
214
- "content": "</answer>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false,
219
- "special": true
220
- },
221
- "151670": {
222
- "content": "</img_base64>",
223
- "lstrip": false,
224
- "normalized": false,
225
- "rstrip": false,
226
- "single_word": false,
227
- "special": true
228
- },
229
- "151671": {
230
- "content": "</json_output>",
231
- "lstrip": false,
232
- "normalized": false,
233
- "rstrip": false,
234
- "single_word": false,
235
- "special": true
236
- },
237
- "151672": {
238
- "content": "</ocr_text>",
239
- "lstrip": false,
240
- "normalized": false,
241
- "rstrip": false,
242
- "single_word": false,
243
- "special": true
244
- },
245
- "151673": {
246
- "content": "</tool_code>",
247
- "lstrip": false,
248
- "normalized": false,
249
- "rstrip": false,
250
- "single_word": false,
251
- "special": true
252
- },
253
- "151674": {
254
- "content": "<answer>",
255
- "lstrip": false,
256
- "normalized": false,
257
- "rstrip": false,
258
- "single_word": false,
259
- "special": true
260
- },
261
- "151675": {
262
- "content": "<img_base64>",
263
- "lstrip": false,
264
- "normalized": false,
265
- "rstrip": false,
266
- "single_word": false,
267
- "special": true
268
- },
269
- "151676": {
270
- "content": "<json_output>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false,
275
- "special": true
276
- },
277
- "151677": {
278
- "content": "<ocr_text>",
279
- "lstrip": false,
280
- "normalized": false,
281
- "rstrip": false,
282
- "single_word": false,
283
- "special": true
284
- },
285
- "151678": {
286
- "content": "<tool_code>",
287
- "lstrip": false,
288
- "normalized": false,
289
- "rstrip": false,
290
- "single_word": false,
291
- "special": true
292
- }
293
- },
294
- "additional_special_tokens": [
295
- "</answer>",
296
- "</img_base64>",
297
- "</json_output>",
298
- "</ocr_text>",
299
- "</think>",
300
- "</tool_code>",
301
- "<answer>",
302
- "<img_base64>",
303
- "<json_output>",
304
- "<ocr_text>",
305
- "<think>",
306
- "<tool_code>"
307
- ],
308
- "bos_token": null,
309
- "clean_up_tokenization_spaces": false,
310
- "eos_token": "<|im_end|>",
311
- "errors": "replace",
312
- "extra_special_tokens": {},
313
- "model_max_length": 131072,
314
- "pad_token": "<|endoftext|>",
315
- "split_special_tokens": false,
316
- "tokenizer_class": "Qwen2Tokenizer",
317
- "unk_token": null
318
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232422_periodic_update_2600/training_state.json DELETED
@@ -1,763 +0,0 @@
1
- {
2
- "global_step": 2600,
3
- "num_updates": 2600,
4
- "use_lora": false,
5
- "rng_state": {
6
- "python": [
7
- 3,
8
- [
9
- 3967271387,
10
- 1026184164,
11
- 3965041145,
12
- 2291916954,
13
- 1837720366,
14
- 1080231142,
15
- 3842271517,
16
- 3425527812,
17
- 240197365,
18
- 2952456686,
19
- 1242835039,
20
- 3963990717,
21
- 2700559131,
22
- 1438349802,
23
- 2358361310,
24
- 27806138,
25
- 391662474,
26
- 1397158451,
27
- 3290575848,
28
- 1443279612,
29
- 2994218186,
30
- 639015565,
31
- 1458236633,
32
- 2738794811,
33
- 75660796,
34
- 822747358,
35
- 4062186731,
36
- 2798375312,
37
- 2554111111,
38
- 4032040375,
39
- 734595846,
40
- 991400690,
41
- 902505219,
42
- 3264611100,
43
- 340175355,
44
- 1539772439,
45
- 2699973612,
46
- 3090769310,
47
- 3227344320,
48
- 2332974129,
49
- 2522140629,
50
- 1358053300,
51
- 2451308853,
52
- 3863433119,
53
- 1654992332,
54
- 4043410239,
55
- 1192164566,
56
- 553958980,
57
- 1523920377,
58
- 307092399,
59
- 4122746375,
60
- 2275324450,
61
- 1118603971,
62
- 2948003228,
63
- 1285607053,
64
- 2683552785,
65
- 150677996,
66
- 2833336017,
67
- 1500750038,
68
- 761162663,
69
- 567868195,
70
- 12561648,
71
- 3472908617,
72
- 421911787,
73
- 1223019265,
74
- 1609174410,
75
- 2139856908,
76
- 1289583719,
77
- 1590076021,
78
- 997166301,
79
- 2579634303,
80
- 3273266456,
81
- 2621387977,
82
- 3943650249,
83
- 1359930915,
84
- 2718495645,
85
- 3602690125,
86
- 4246287691,
87
- 1450000782,
88
- 439802689,
89
- 1554690948,
90
- 1911779206,
91
- 1612261319,
92
- 3158623330,
93
- 1363688027,
94
- 2629041297,
95
- 1220231722,
96
- 3909353626,
97
- 3864988698,
98
- 715439623,
99
- 2690554799,
100
- 1962746674,
101
- 773247238,
102
- 2801483771,
103
- 1762340389,
104
- 3437644373,
105
- 2872830304,
106
- 2008579063,
107
- 1514567691,
108
- 1583533810,
109
- 3009140248,
110
- 3722410595,
111
- 2613372944,
112
- 2287445042,
113
- 1874155746,
114
- 3245031202,
115
- 1976154694,
116
- 139059957,
117
- 4119838693,
118
- 3305140779,
119
- 1999827708,
120
- 50827604,
121
- 2243156149,
122
- 130672473,
123
- 2447788079,
124
- 1599266450,
125
- 336263314,
126
- 2105192952,
127
- 2297407557,
128
- 1049027620,
129
- 891242113,
130
- 3062510035,
131
- 1350083608,
132
- 1285665300,
133
- 393408272,
134
- 1685144927,
135
- 908939345,
136
- 4265052142,
137
- 3410082095,
138
- 2059177364,
139
- 2663072991,
140
- 2839859162,
141
- 1807788264,
142
- 3670966323,
143
- 1408830836,
144
- 2277275526,
145
- 2132904638,
146
- 3249212407,
147
- 1532782497,
148
- 2146509655,
149
- 1042323006,
150
- 1591886840,
151
- 775265065,
152
- 1831216413,
153
- 2942338395,
154
- 3275679928,
155
- 636464774,
156
- 625752514,
157
- 3395261534,
158
- 3422484700,
159
- 3288346824,
160
- 3414769759,
161
- 1598966223,
162
- 147553345,
163
- 2902205377,
164
- 2854801943,
165
- 1981896482,
166
- 4163890305,
167
- 161164831,
168
- 2802426963,
169
- 2576904579,
170
- 4016006363,
171
- 511281868,
172
- 1766876080,
173
- 2351919422,
174
- 3951315846,
175
- 3685372087,
176
- 2978153450,
177
- 2065462037,
178
- 3763241279,
179
- 1424853649,
180
- 1604969762,
181
- 3260153236,
182
- 2102904571,
183
- 500994218,
184
- 2439467442,
185
- 402407996,
186
- 351218299,
187
- 881150311,
188
- 3581411273,
189
- 259025625,
190
- 1267497165,
191
- 2062889741,
192
- 2691827516,
193
- 817381649,
194
- 1212806228,
195
- 2759627479,
196
- 220373713,
197
- 2595028892,
198
- 2442232401,
199
- 1569800854,
200
- 343128555,
201
- 636335630,
202
- 1962444392,
203
- 3804276325,
204
- 2779312761,
205
- 1374006413,
206
- 3026390385,
207
- 2995559877,
208
- 3655553179,
209
- 1662430865,
210
- 31590074,
211
- 121330124,
212
- 2800224703,
213
- 775347029,
214
- 448217505,
215
- 2063355508,
216
- 1013466290,
217
- 2918262154,
218
- 3426228026,
219
- 2226777553,
220
- 4156207311,
221
- 3333921308,
222
- 3947863768,
223
- 1249538366,
224
- 236920610,
225
- 3355109765,
226
- 2123516788,
227
- 1096721379,
228
- 2557118369,
229
- 3229855130,
230
- 2010608274,
231
- 1991908981,
232
- 1262223436,
233
- 1464738273,
234
- 3464681822,
235
- 2630317900,
236
- 2254902127,
237
- 3306153656,
238
- 208406325,
239
- 1067526520,
240
- 4026720699,
241
- 3567293771,
242
- 3658434018,
243
- 1975944438,
244
- 3899032814,
245
- 1715875890,
246
- 1000236124,
247
- 2306234460,
248
- 3621852674,
249
- 2874181232,
250
- 200568775,
251
- 3073791375,
252
- 2523932121,
253
- 708394853,
254
- 3365567442,
255
- 498387788,
256
- 3480987681,
257
- 3657038461,
258
- 3064855411,
259
- 829347864,
260
- 1028039079,
261
- 3602672997,
262
- 1744316950,
263
- 1132707734,
264
- 926119298,
265
- 2772575537,
266
- 282854042,
267
- 1900724014,
268
- 3029583931,
269
- 395776534,
270
- 670561433,
271
- 3274389917,
272
- 2802241091,
273
- 3071226945,
274
- 2361585457,
275
- 1797476586,
276
- 2225467261,
277
- 1706335457,
278
- 492246825,
279
- 2477168512,
280
- 815813939,
281
- 2855420512,
282
- 3495681585,
283
- 2575223221,
284
- 2537280983,
285
- 3314566655,
286
- 1164100149,
287
- 1977301214,
288
- 4193175418,
289
- 480545945,
290
- 3537844675,
291
- 1664891516,
292
- 1443772533,
293
- 3295232896,
294
- 3317793780,
295
- 1215470101,
296
- 3218299303,
297
- 3575188822,
298
- 1250351790,
299
- 749808713,
300
- 2050084806,
301
- 572159727,
302
- 308805938,
303
- 780768817,
304
- 722735732,
305
- 756700681,
306
- 3795455885,
307
- 282113829,
308
- 3919074469,
309
- 3707744024,
310
- 670882057,
311
- 17817083,
312
- 1637485633,
313
- 3281135527,
314
- 1509269331,
315
- 12243112,
316
- 1945667537,
317
- 2089133460,
318
- 1778357461,
319
- 1142017596,
320
- 1727646817,
321
- 597203797,
322
- 894461142,
323
- 247967411,
324
- 3636397797,
325
- 1257083668,
326
- 3535042016,
327
- 3456404171,
328
- 2272706014,
329
- 2261998731,
330
- 2819821439,
331
- 4038821677,
332
- 2599180325,
333
- 1226717655,
334
- 3333051398,
335
- 2986286242,
336
- 149297818,
337
- 1116252696,
338
- 3983432272,
339
- 1563620267,
340
- 166236638,
341
- 1342965486,
342
- 3425751359,
343
- 565185296,
344
- 1639862847,
345
- 2909755277,
346
- 186513437,
347
- 1688704278,
348
- 999207239,
349
- 113552983,
350
- 1298001852,
351
- 319612107,
352
- 3792112676,
353
- 747046330,
354
- 1185372334,
355
- 11347435,
356
- 1792826169,
357
- 297736755,
358
- 632733242,
359
- 684491354,
360
- 4194645610,
361
- 680766781,
362
- 3666422277,
363
- 4164736579,
364
- 1162866320,
365
- 1705411912,
366
- 2372330857,
367
- 2646063778,
368
- 3829899589,
369
- 1317565577,
370
- 1401229255,
371
- 1808220379,
372
- 2203148417,
373
- 1261380648,
374
- 2201630139,
375
- 3494578862,
376
- 2760365219,
377
- 378948370,
378
- 4115035773,
379
- 2194258730,
380
- 493318225,
381
- 4111876160,
382
- 198214727,
383
- 2938074736,
384
- 350928625,
385
- 846140936,
386
- 2796483478,
387
- 1892200324,
388
- 1206520424,
389
- 2785939791,
390
- 391211171,
391
- 2248891773,
392
- 1382812750,
393
- 3015580149,
394
- 1268639917,
395
- 724243112,
396
- 76030864,
397
- 1018227745,
398
- 2351890926,
399
- 2600896235,
400
- 220033969,
401
- 1401586091,
402
- 2071710477,
403
- 3210350300,
404
- 1690420126,
405
- 3321335187,
406
- 2667558880,
407
- 3214017202,
408
- 1054591551,
409
- 2685286044,
410
- 4010372155,
411
- 2879699849,
412
- 3593888005,
413
- 1630255656,
414
- 722870681,
415
- 2824448803,
416
- 758046745,
417
- 486385045,
418
- 148699239,
419
- 1823339336,
420
- 3626156994,
421
- 221046228,
422
- 191046018,
423
- 2152525365,
424
- 1859333602,
425
- 2880813865,
426
- 466507697,
427
- 221358203,
428
- 851466719,
429
- 2415410943,
430
- 2113512377,
431
- 3716237849,
432
- 3272429115,
433
- 3228418199,
434
- 3284186060,
435
- 4247471141,
436
- 2201569734,
437
- 223470072,
438
- 2002708052,
439
- 3198208064,
440
- 2376412412,
441
- 1463203154,
442
- 1809443204,
443
- 2359348598,
444
- 1600065056,
445
- 3215221234,
446
- 4165516417,
447
- 3921844367,
448
- 1959771516,
449
- 1679280620,
450
- 1749349436,
451
- 2233430027,
452
- 2717267181,
453
- 3964670612,
454
- 3422468358,
455
- 2954866445,
456
- 2445246838,
457
- 1362196395,
458
- 2595844428,
459
- 3645080545,
460
- 1587556344,
461
- 2755729671,
462
- 229885802,
463
- 2185126283,
464
- 3766598538,
465
- 2737234349,
466
- 364955618,
467
- 1607374308,
468
- 2706323199,
469
- 3687070449,
470
- 1627005950,
471
- 2674407664,
472
- 3630669444,
473
- 1033589683,
474
- 3954912200,
475
- 2072092797,
476
- 3862368041,
477
- 2370545261,
478
- 163801077,
479
- 2144696655,
480
- 576857368,
481
- 2814318568,
482
- 713192918,
483
- 2953431186,
484
- 2672630836,
485
- 3702980078,
486
- 748001453,
487
- 3504919089,
488
- 1058606135,
489
- 1948644168,
490
- 3782905274,
491
- 1801691984,
492
- 1840015467,
493
- 3958077367,
494
- 3902829790,
495
- 1403592563,
496
- 2155574652,
497
- 1909700095,
498
- 450763344,
499
- 3687053349,
500
- 419335117,
501
- 1301216505,
502
- 2876532296,
503
- 1915964542,
504
- 3258106886,
505
- 2200859680,
506
- 2692011171,
507
- 1456962543,
508
- 2389659463,
509
- 1346117759,
510
- 1395178467,
511
- 2284331656,
512
- 1517154729,
513
- 838732991,
514
- 1576226382,
515
- 731833408,
516
- 2402980990,
517
- 4136742285,
518
- 968065114,
519
- 2846239888,
520
- 3160979071,
521
- 3064847145,
522
- 3268819516,
523
- 1501375000,
524
- 3262611637,
525
- 1482644061,
526
- 3869662591,
527
- 3715366422,
528
- 4242305207,
529
- 122918596,
530
- 3597934906,
531
- 3169248692,
532
- 3724069332,
533
- 2002817636,
534
- 3617952404,
535
- 1380342751,
536
- 2000442134,
537
- 3909627280,
538
- 1399763842,
539
- 2025926546,
540
- 122000688,
541
- 2311840429,
542
- 2008978145,
543
- 2393006054,
544
- 2691729392,
545
- 1360604707,
546
- 3592756263,
547
- 1803431373,
548
- 875104400,
549
- 762907645,
550
- 3955227568,
551
- 3515418115,
552
- 3226061968,
553
- 3397142323,
554
- 968385291,
555
- 3045138149,
556
- 440380095,
557
- 3765813124,
558
- 2579837417,
559
- 3198260520,
560
- 2678283459,
561
- 2395660105,
562
- 759161070,
563
- 3657479079,
564
- 130768490,
565
- 1813297074,
566
- 1825796373,
567
- 80597496,
568
- 1015972759,
569
- 3567390046,
570
- 1117320950,
571
- 2284466118,
572
- 1352877410,
573
- 2870165791,
574
- 3391069470,
575
- 3848649012,
576
- 3864938430,
577
- 83162416,
578
- 3514199373,
579
- 1679135968,
580
- 3910876316,
581
- 1897074782,
582
- 1186656431,
583
- 1443718422,
584
- 930721565,
585
- 2580384646,
586
- 3071840050,
587
- 1817142007,
588
- 306252371,
589
- 583436048,
590
- 1143706159,
591
- 3470195927,
592
- 2629056830,
593
- 1054234580,
594
- 839422126,
595
- 786000543,
596
- 1645391398,
597
- 437221603,
598
- 1655602391,
599
- 3804125963,
600
- 471478435,
601
- 3007407057,
602
- 3701105273,
603
- 1821795365,
604
- 2492888224,
605
- 2019211184,
606
- 3537434119,
607
- 3674233243,
608
- 746650350,
609
- 1262611233,
610
- 723466668,
611
- 460013785,
612
- 233285992,
613
- 1809385760,
614
- 4096419422,
615
- 1401994125,
616
- 927259547,
617
- 3580706171,
618
- 3954080383,
619
- 1440686000,
620
- 2183228917,
621
- 344398639,
622
- 3829306612,
623
- 2629498462,
624
- 3659623063,
625
- 1120047396,
626
- 440893707,
627
- 2300023223,
628
- 2915441674,
629
- 931117951,
630
- 92325697,
631
- 710694309,
632
- 717544865,
633
- 392
634
- ],
635
- null
636
- ],
637
- "numpy": [
638
- "MT19937",
639
- "[ 15572 980513701 2334715163 3585534944 1822198675 158479007\n 1300107201 2003433159 424170022 4102602503 2437447838 1924282775\n 2084306490 4132823124 4216394081 1526156729 4231078312 3658730376\n 3599347945 3798337125 544676946 3949203055 1596292274 2255158710\n 703032348 636265253 2880318131 3345387760 162413307 2418710564\n 3712245020 2175226970 563044056 2939814745 2838234633 468141434\n 616739654 564867267 2130155541 815641611 601811839 2004017220\n 3627706467 3951463947 810570068 3028421201 454655469 3270345648\n 555008207 3255294172 3259033389 429183833 272696145 2007214122\n 2243779629 1934853570 517873959 1769075612 2057249323 825685197\n 21711389 271106734 3943034084 3547272802 1718926725 3289803093\n 2224067888 3644890877 3431377018 1754806530 2376472949 2892610362\n 1500501344 3824621710 1417356523 4122790557 775716514 1813030967\n 3994108828 391693578 1388189506 1179060416 1727839607 3646219745\n 3467814014 1642908326 1500533561 1281477999 2139613886 209245364\n 1449934686 3593983958 693904485 999778614 847538348 922048571\n 1218470567 916177375 1196217437 3715695791 3572687936 2177952212\n 2976499668 1502043605 3295312140 473156180 3489777550 4116262924\n 726728023 266896064 1207195182 1422796794 3298302327 2546802485\n 3089285157 4087066326 281999229 3833891270 4133996591 3393553875\n 1790581640 1088958086 372349629 1150038540 3967005853 3449868499\n 2783031950 745299387 4177989191 440862037 1630705675 3432601205\n 3976086290 2477494074 2631694750 55867155 3381732871 3988268061\n 4190916192 3426936986 1292805609 2800262421 1433946529 2604379311\n 3803100625 2130255467 4134910564 3389303350 912295037 1986407142\n 60132537 1465812628 2556679777 768839848 561642210 2962288069\n 2900250399 2446769166 2830015834 1820339287 3836052622 3843539266\n 3448899655 719693687 2608513431 807017838 705657612 1313405027\n 308572498 3011745795 3544875535 3662789479 792949536 1679330381\n 2262304426 3714222327 3252067572 3530366244 2847367189 1818638534\n 4196918839 1197188921 1714287054 3610324578 1759524349 658319653\n 4062775635 2170322251 3246948233 467741743 2311362121 1326083926\n 2215935251 2860151158 3543259014 4288886076 1000061343 35607424\n 3800789574 1024328448 2871144392 1452668283 2547917804 794856369\n 3652179617 850026679 66787329 99615284 3360338772 2597540132\n 1809210064 3923947080 4257343339 372176093 3483099399 721873771\n 1101276431 2834898271 76176267 4200628658 2773607601 3516401276\n 3454896121 2354147457 1223458703 3128119783 529915195 2585910314\n 3544532140 551469296 3014823830 3511999051 1463986000 3624754398\n 704833819 3852626834 2711357985 1324465084 1627535231 570708901\n 2717601289 725003848 1942965082 328822490 3620824822 1651096734\n 2785116849 1685019798 1956468619 3534906475 922043906 3007951301\n 4238844799 755293417 3011697131 446474988 2478554140 2374977239\n 1380053003 2243890933 2686719863 2130566958 3840030097 1793607073\n 3796244776 2639065600 3001847252 1477803129 1197364324 2384746982\n 766409730 2722234329 3102942583 2032952634 961381465 1104664880\n 431680105 3809455490 596671995 2974418974 2572169252 3211875863\n 1700476003 2461146565 4138090399 3571815849 2874279728 3348404697\n 1894538786 1654054896 1861571639 643113073 1024018105 110160162\n 3688705424 1588307750 1010684555 1071478016 3908839978 1361810520\n 4077136201 1942152543 3862285636 788921650 3325732842 2086596894\n 2354559341 920752382 2089296746 412564844 3783133394 3542635692\n 151261507 2024582056 27012071 3714904670 2251973869 3415653000\n 3122199829 1793993298 3504479999 2494502573 3472923469 628884745\n 400477905 2068607674 2511099917 76006962 4127192322 656421806\n 2099151600 4055081824 3120853595 1902231402 1793968517 2739152483\n 3675524757 3637643391 2093725246 3500121902 291758869 1652675998\n 1139040273 1626503079 1329269718 3800218668 1301440229 4094598479\n 2030419032 2206069114 2167504310 3568823651 1480132672 4189195270\n 1003514971 2108978250 4235920891 1015223357 1328980599 3065593845\n 772959451 1736648400 1111173855 2673522945 3202521755 1515315402\n 1025987717 2556593896 1098413506 3353399904 2969501057 2094670114\n 2847919939 3042807578 2837794286 1675561875 2905519122 4265188297\n 2610926124 846285729 2241003777 2845770412 2129473060 3762815768\n 2144316967 1546390655 1870814520 1524713984 3716398313 3346580439\n 507361322 3071157273 715371311 667081236 1562427246 1416032086\n 2719153631 1214541502 3927763433 4093412577 1609261242 1472085592\n 2916826031 2284397012 4029669634 4115943418 618581971 2078599894\n 2195634027 568626950 551593208 1404161907 4048083862 206856294\n 2947194844 2767249973 1603907667 1631351803 1522568516 3530861276\n 932299423 1409409376 1006753259 2778802782 2428826612 4160546743\n 1957871534 3303229622 2827456021 2670092224 3383794488 945607414\n 1983832766 358657548 4271708270 755497396 3434273208 1362230140\n 1689853703 801994005 3746197505 3596436611 1542752314 3254670338\n 1548922657 4130814301 3943625172 221411986 713064282 4233062979\n 4075891970 2437106728 544573526 3064910259 151483803 689855214\n 1545945006 3683633364 3289795997 329789217 4168762065 3787243687\n 2265695874 39834191 1266893307 1324209011 1243246540 3973960372\n 541659911 1362379416 1601251635 1863255185 3125665448 4219983083\n 2682202466 567260347 1405575843 3420495303 2758288434 3586390223\n 336221788 2630832173 3573336941 1218191945 2213154892 2821992107\n 3693992851 696758711 279252507 2892498320 1862489732 255938916\n 1661968992 3484941106 4082783555 2936202287 3514506417 1758172715\n 517257860 3411688455 3637760904 1419041484 2137852375 757229925\n 3065881553 339906360 661617426 760623637 1046610245 876310326\n 1014024268 3183719419 2438284349 1776461276 3594106675 2854090739\n 2602159385 4289618092 2857553425 695761542 3083079398 76408317\n 3254461403 153861699 2129194363 2941386031 4094753231 1156889483\n 2242959746 1437290897 1982676962 1514123682 1353077347 1818424511\n 2436251404 4085735581 3547311726 4033030170 2353214711 479069124\n 241656432 4085762125 551929572 2454945299 132044757 524232234\n 3799812788 6269782 1338614034 3443833252 1258856457 4165168463\n 355192100 2534159709 2851727269 2518795790 3366162664 3414356452\n 1869549905 2688919231 3437293505 2606459835 902202159 3721325246\n 1701626821 1124672137 2815257054 3602219523 1714310200 949851574\n 2336520456 504372525 1144232445 195534505 4020833259 946396359\n 1559665603 3237234635 826432554 799463637 3769666381 3612718603\n 762518382 2954474157 4052494419 672053721 1345549799 1873779721\n 2936068468 972026843 1411934901 2952294227 2546812485 2659826516\n 2834428224 2455667549 4281380303 2345320401 3932855189 309111429\n 834893265 2699122382 2146331862 3207660078 1202940344 1030469978\n 3945221344 2900087534 722371964 3096315981 1621397645 907802015\n 450035999 2628913824 3160204880 3961963201 349350642 3107386851\n 560688431 2098806006 3142991583 79041694 2234561220 122454157]",
640
- 624,
641
- 0,
642
- 0.0
643
- ],
644
- "mlx": 1756905873,
645
- "mlx_key": [
646
- 0,
647
- 1756903229
648
- ]
649
- },
650
- "training_args_snapshot": {
651
- "output_dir": "outy1266_align_last7",
652
- "max_kv_size": 1536,
653
- "model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen3-4B-MLX-8bit",
654
- "ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/InferenceIllusionist/gpt-oss-20b-MLX-4bit",
655
- "draft_model_path": null,
656
- "benchmark_every": 0,
657
- "benchmark_dataset": "gsm8k",
658
- "benchmark_dataset_config": "main",
659
- "benchmark_split": "test",
660
- "benchmark_samples": 10,
661
- "benchmark_prompt_key": "question",
662
- "benchmark_answer_key": "answer",
663
- "benchmark_max_new_tokens": 196,
664
- "benchmark_temperature": 0.0,
665
- "benchmark_top_p": 1.0,
666
- "benchmark_top_k": 0,
667
- "benchmark_use_chat_template": true,
668
- "benchmark_stop_on_error": false,
669
- "min_think_tokens": 32,
670
- "think_end_early_bias": -12.0,
671
- "bias_answer_start_after_min_think": true,
672
- "train_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/train.jsonl",
673
- "val_dataset_path": null,
674
- "dataset_name": null,
675
- "dataset_config": null,
676
- "dataset_train_split": "train",
677
- "dataset_val_split": "test",
678
- "dataset_prompt_key": "prompt",
679
- "dataset_answer_key": "completion",
680
- "max_prompt_len": 350,
681
- "max_gen_len": 128,
682
- "system_prompt": null,
683
- "think_start_tag": "<think>",
684
- "think_end_tag": "</think>",
685
- "answer_start_tag": "<answer>",
686
- "answer_end_tag": "</answer>",
687
- "think_boost_tokens": 24,
688
- "think_temperature": 0.15,
689
- "answer_temperature": 0.1,
690
- "sampling_top_p": 0.6,
691
- "sampling_min_p": 0.05,
692
- "sampling_top_k": 40,
693
- "repetition_penalty": 1.15,
694
- "repetition_context_size": 64,
695
- "hard_mask_mcq_first_token": true,
696
- "mcq_letter_lift": 10.0,
697
- "mcq_ban_first_bias": -14.0,
698
- "nonmcq_ban_first_bias": -10.0,
699
- "mcq_close_after_k": 1,
700
- "min_answer_tokens": 6,
701
- "min_answer_tokens_mcq": 1,
702
- "bias_close_think": 6.0,
703
- "bias_answer_start": 3.0,
704
- "punish_reopen_think": -3.0,
705
- "punish_extra_think_end": -6.0,
706
- "bias_eos_after_answer": 4.0,
707
- "allow_tool_calls": false,
708
- "tool_call_penalty": 1.0,
709
- "reward_content_type": "smart",
710
- "reward_format_weight": 0.2,
711
- "reward_content_weight": 0.7,
712
- "think_reward_weight": 0.1,
713
- "think_len_min": 16,
714
- "think_len_max": 64,
715
- "use_lora": false,
716
- "num_rollout_samples": 3,
717
- "ppo_batch_size": 1,
718
- "grpo_beta": 0.04,
719
- "learning_rate": 1.4e-06,
720
- "optimizer_beta1": 0.9,
721
- "optimizer_beta2": 0.95,
722
- "optimizer_weight_decay": 0.01,
723
- "grad_clip_norm": 0.35,
724
- "save_optimizer_state": false,
725
- "lr_schedule_config": {
726
- "name": "cosine_decay",
727
- "arguments": [
728
- 1.4e-06,
729
- 60000,
730
- 2e-07
731
- ],
732
- "warmup": 4000,
733
- "warmup_init": 2e-07
734
- },
735
- "grad_accum_steps": 2,
736
- "num_training_steps": 45869,
737
- "save_every": 10,
738
- "eval_every": 0,
739
- "seed": 15572,
740
- "shuffle_data": true,
741
- "use_grad_checkpointing": false,
742
- "grad_checkpoint_layers": 0,
743
- "log_samples_every": 1,
744
- "max_logged_samples": 50,
745
- "log_prompts": true,
746
- "sample_log_path": null,
747
- "kv_bits": 0,
748
- "kv_group_size": 64,
749
- "quantized_kv_start": 0,
750
- "verbose": true,
751
- "use_wandb": true,
752
- "wandb_project": "reasonable-qwen-4b-mlxv2isi",
753
- "wandb_entity": null,
754
- "wandb_run_name": null,
755
- "resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last7/checkpoint_20250903_224014_periodic_update_2510",
756
- "allow_cross_arch_ref": true,
757
- "align_bridge_path": null,
758
- "align_bridge_weight": 1.0,
759
- "align_pool": "mean",
760
- "align_after_tag": "<|start|>assistant<|channel|>analysis<|message|>",
761
- "effective_batch_size": 4
762
- }
763
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232422_periodic_update_2600/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232551_exit_request_update_2602/added_tokens.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "</answer>": 151669,
3
- "</img_base64>": 151670,
4
- "</json_output>": 151671,
5
- "</ocr_text>": 151672,
6
- "</think>": 151668,
7
- "</tool_call>": 151658,
8
- "</tool_code>": 151673,
9
- "</tool_response>": 151666,
10
- "<answer>": 151674,
11
- "<img_base64>": 151675,
12
- "<json_output>": 151676,
13
- "<ocr_text>": 151677,
14
- "<think>": 151667,
15
- "<tool_call>": 151657,
16
- "<tool_code>": 151678,
17
- "<tool_response>": 151665,
18
- "<|box_end|>": 151649,
19
- "<|box_start|>": 151648,
20
- "<|endoftext|>": 151643,
21
- "<|file_sep|>": 151664,
22
- "<|fim_middle|>": 151660,
23
- "<|fim_pad|>": 151662,
24
- "<|fim_prefix|>": 151659,
25
- "<|fim_suffix|>": 151661,
26
- "<|im_end|>": 151645,
27
- "<|im_start|>": 151644,
28
- "<|image_pad|>": 151655,
29
- "<|object_ref_end|>": 151647,
30
- "<|object_ref_start|>": 151646,
31
- "<|quad_end|>": 151651,
32
- "<|quad_start|>": 151650,
33
- "<|repo_name|>": 151663,
34
- "<|video_pad|>": 151656,
35
- "<|vision_end|>": 151653,
36
- "<|vision_pad|>": 151654,
37
- "<|vision_start|>": 151652
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232551_exit_request_update_2602/chat_template.jinja DELETED
@@ -1,96 +0,0 @@
1
- {%- if tools %}
2
- {{- '<|im_start|>system\n' }}
3
- {%- if messages[0].role == 'system' %}
4
- {{- messages[0].content + '\n\n' }}
5
- {%- endif %}
6
- {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
- {%- for tool in tools %}
8
- {{- "\n" }}
9
- {{- tool | tojson }}
10
- {%- endfor %}
11
- {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
- {%- else %}
13
- {%- if messages[0].role == 'system' %}
14
- {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
- {%- endif %}
16
- {%- endif %}
17
- {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
- {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- set tool_start = "<tool_response>" %}
21
- {%- set tool_start_length = tool_start|length %}
22
- {%- set start_of_message = message.content[:tool_start_length] %}
23
- {%- set tool_end = "</tool_response>" %}
24
- {%- set tool_end_length = tool_end|length %}
25
- {%- set start_pos = (message.content|length) - tool_end_length %}
26
- {%- if start_pos < 0 %}
27
- {%- set start_pos = 0 %}
28
- {%- endif %}
29
- {%- set end_of_message = message.content[start_pos:] %}
30
- {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
31
- {%- set ns.multi_step_tool = false %}
32
- {%- set ns.last_query_index = index %}
33
- {%- endif %}
34
- {%- endfor %}
35
- {%- for message in messages %}
36
- {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
37
- {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
38
- {%- elif message.role == "assistant" %}
39
- {%- set content = message.content %}
40
- {%- set reasoning_content = '' %}
41
- {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
42
- {%- set reasoning_content = message.reasoning_content %}
43
- {%- else %}
44
- {%- if '</think>' in message.content %}
45
- {%- set content = (message.content.split('</think>')|last).lstrip('\n') %}
46
- {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\n') %}
47
- {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
48
- {%- endif %}
49
- {%- endif %}
50
- {%- if loop.index0 > ns.last_query_index %}
51
- {%- if loop.last or (not loop.last and reasoning_content) %}
52
- {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
53
- {%- else %}
54
- {{- '<|im_start|>' + message.role + '\n' + content }}
55
- {%- endif %}
56
- {%- else %}
57
- {{- '<|im_start|>' + message.role + '\n' + content }}
58
- {%- endif %}
59
- {%- if message.tool_calls %}
60
- {%- for tool_call in message.tool_calls %}
61
- {%- if (loop.first and content) or (not loop.first) %}
62
- {{- '\n' }}
63
- {%- endif %}
64
- {%- if tool_call.function %}
65
- {%- set tool_call = tool_call.function %}
66
- {%- endif %}
67
- {{- '<tool_call>\n{"name": "' }}
68
- {{- tool_call.name }}
69
- {{- '", "arguments": ' }}
70
- {%- if tool_call.arguments is string %}
71
- {{- tool_call.arguments }}
72
- {%- else %}
73
- {{- tool_call.arguments | tojson }}
74
- {%- endif %}
75
- {{- '}\n</tool_call>' }}
76
- {%- endfor %}
77
- {%- endif %}
78
- {{- '<|im_end|>\n' }}
79
- {%- elif message.role == "tool" %}
80
- {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
81
- {{- '<|im_start|>user' }}
82
- {%- endif %}
83
- {{- '\n<tool_response>\n' }}
84
- {{- message.content }}
85
- {{- '\n</tool_response>' }}
86
- {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
87
- {{- '<|im_end|>\n' }}
88
- {%- endif %}
89
- {%- endif %}
90
- {%- endfor %}
91
- {%- if add_generation_prompt %}
92
- {{- '<|im_start|>assistant\n' }}
93
- {%- if enable_thinking is defined and enable_thinking is false %}
94
- {{- '<think>\n\n</think>\n\n' }}
95
- {%- endif %}
96
- {%- endif %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232551_exit_request_update_2602/config.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
- "eos_token_id": 151645,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 2560,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 9728,
14
- "max_position_embeddings": 40960,
15
- "max_window_layers": 36,
16
- "model_type": "qwen3",
17
- "num_attention_heads": 32,
18
- "num_hidden_layers": 36,
19
- "num_key_value_heads": 8,
20
- "rms_norm_eps": 1e-06,
21
- "rope_scaling": null,
22
- "rope_theta": 1000000,
23
- "sliding_window": null,
24
- "tie_word_embeddings": true,
25
- "torch_dtype": "bfloat16",
26
- "transformers_version": "4.51.0",
27
- "use_cache": true,
28
- "use_sliding_window": false,
29
- "vocab_size": 151936
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232551_exit_request_update_2602/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232551_exit_request_update_2602/special_tokens_map.json DELETED
@@ -1,102 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- {
4
- "content": "</answer>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "</img_base64>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "</json_output>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "</ocr_text>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "</think>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</tool_code>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "<answer>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "<img_base64>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "<json_output>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "<ocr_text>",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
- {
74
- "content": "<think>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
- },
80
- {
81
- "content": "<tool_code>",
82
- "lstrip": false,
83
- "normalized": false,
84
- "rstrip": false,
85
- "single_word": false
86
- }
87
- ],
88
- "eos_token": {
89
- "content": "<|im_end|>",
90
- "lstrip": false,
91
- "normalized": false,
92
- "rstrip": false,
93
- "single_word": false
94
- },
95
- "pad_token": {
96
- "content": "<|endoftext|>",
97
- "lstrip": false,
98
- "normalized": false,
99
- "rstrip": false,
100
- "single_word": false
101
- }
102
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232551_exit_request_update_2602/tokenizer_config.json DELETED
@@ -1,318 +0,0 @@
1
- {
2
- "add_bos_token": false,
3
- "add_prefix_space": false,
4
- "added_tokens_decoder": {
5
- "151643": {
6
- "content": "<|endoftext|>",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "151644": {
14
- "content": "<|im_start|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "151645": {
22
- "content": "<|im_end|>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "151646": {
30
- "content": "<|object_ref_start|>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "151647": {
38
- "content": "<|object_ref_end|>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "151648": {
46
- "content": "<|box_start|>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "151649": {
54
- "content": "<|box_end|>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- },
61
- "151650": {
62
- "content": "<|quad_start|>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": true
68
- },
69
- "151651": {
70
- "content": "<|quad_end|>",
71
- "lstrip": false,
72
- "normalized": false,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": true
76
- },
77
- "151652": {
78
- "content": "<|vision_start|>",
79
- "lstrip": false,
80
- "normalized": false,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": true
84
- },
85
- "151653": {
86
- "content": "<|vision_end|>",
87
- "lstrip": false,
88
- "normalized": false,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": true
92
- },
93
- "151654": {
94
- "content": "<|vision_pad|>",
95
- "lstrip": false,
96
- "normalized": false,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": true
100
- },
101
- "151655": {
102
- "content": "<|image_pad|>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": true
108
- },
109
- "151656": {
110
- "content": "<|video_pad|>",
111
- "lstrip": false,
112
- "normalized": false,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": true
116
- },
117
- "151657": {
118
- "content": "<tool_call>",
119
- "lstrip": false,
120
- "normalized": false,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": false
124
- },
125
- "151658": {
126
- "content": "</tool_call>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": false
132
- },
133
- "151659": {
134
- "content": "<|fim_prefix|>",
135
- "lstrip": false,
136
- "normalized": false,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": false
140
- },
141
- "151660": {
142
- "content": "<|fim_middle|>",
143
- "lstrip": false,
144
- "normalized": false,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": false
148
- },
149
- "151661": {
150
- "content": "<|fim_suffix|>",
151
- "lstrip": false,
152
- "normalized": false,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": false
156
- },
157
- "151662": {
158
- "content": "<|fim_pad|>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": false
164
- },
165
- "151663": {
166
- "content": "<|repo_name|>",
167
- "lstrip": false,
168
- "normalized": false,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": false
172
- },
173
- "151664": {
174
- "content": "<|file_sep|>",
175
- "lstrip": false,
176
- "normalized": false,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": false
180
- },
181
- "151665": {
182
- "content": "<tool_response>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": false
188
- },
189
- "151666": {
190
- "content": "</tool_response>",
191
- "lstrip": false,
192
- "normalized": false,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": false
196
- },
197
- "151667": {
198
- "content": "<think>",
199
- "lstrip": false,
200
- "normalized": false,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": true
204
- },
205
- "151668": {
206
- "content": "</think>",
207
- "lstrip": false,
208
- "normalized": false,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": true
212
- },
213
- "151669": {
214
- "content": "</answer>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false,
219
- "special": true
220
- },
221
- "151670": {
222
- "content": "</img_base64>",
223
- "lstrip": false,
224
- "normalized": false,
225
- "rstrip": false,
226
- "single_word": false,
227
- "special": true
228
- },
229
- "151671": {
230
- "content": "</json_output>",
231
- "lstrip": false,
232
- "normalized": false,
233
- "rstrip": false,
234
- "single_word": false,
235
- "special": true
236
- },
237
- "151672": {
238
- "content": "</ocr_text>",
239
- "lstrip": false,
240
- "normalized": false,
241
- "rstrip": false,
242
- "single_word": false,
243
- "special": true
244
- },
245
- "151673": {
246
- "content": "</tool_code>",
247
- "lstrip": false,
248
- "normalized": false,
249
- "rstrip": false,
250
- "single_word": false,
251
- "special": true
252
- },
253
- "151674": {
254
- "content": "<answer>",
255
- "lstrip": false,
256
- "normalized": false,
257
- "rstrip": false,
258
- "single_word": false,
259
- "special": true
260
- },
261
- "151675": {
262
- "content": "<img_base64>",
263
- "lstrip": false,
264
- "normalized": false,
265
- "rstrip": false,
266
- "single_word": false,
267
- "special": true
268
- },
269
- "151676": {
270
- "content": "<json_output>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false,
275
- "special": true
276
- },
277
- "151677": {
278
- "content": "<ocr_text>",
279
- "lstrip": false,
280
- "normalized": false,
281
- "rstrip": false,
282
- "single_word": false,
283
- "special": true
284
- },
285
- "151678": {
286
- "content": "<tool_code>",
287
- "lstrip": false,
288
- "normalized": false,
289
- "rstrip": false,
290
- "single_word": false,
291
- "special": true
292
- }
293
- },
294
- "additional_special_tokens": [
295
- "</answer>",
296
- "</img_base64>",
297
- "</json_output>",
298
- "</ocr_text>",
299
- "</think>",
300
- "</tool_code>",
301
- "<answer>",
302
- "<img_base64>",
303
- "<json_output>",
304
- "<ocr_text>",
305
- "<think>",
306
- "<tool_code>"
307
- ],
308
- "bos_token": null,
309
- "clean_up_tokenization_spaces": false,
310
- "eos_token": "<|im_end|>",
311
- "errors": "replace",
312
- "extra_special_tokens": {},
313
- "model_max_length": 131072,
314
- "pad_token": "<|endoftext|>",
315
- "split_special_tokens": false,
316
- "tokenizer_class": "Qwen2Tokenizer",
317
- "unk_token": null
318
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232551_exit_request_update_2602/training_state.json DELETED
@@ -1,763 +0,0 @@
1
- {
2
- "global_step": 2602,
3
- "num_updates": 2602,
4
- "use_lora": false,
5
- "rng_state": {
6
- "python": [
7
- 3,
8
- [
9
- 3228212754,
10
- 279998097,
11
- 2056665714,
12
- 3603597067,
13
- 3476614728,
14
- 3031000,
15
- 2954941703,
16
- 3205203341,
17
- 1022055790,
18
- 1415253971,
19
- 2181335448,
20
- 3582899399,
21
- 1644250258,
22
- 1793404199,
23
- 2566317855,
24
- 109101073,
25
- 3150506052,
26
- 2724067265,
27
- 97547420,
28
- 2995366220,
29
- 3519916584,
30
- 3209457352,
31
- 3126309277,
32
- 3451453441,
33
- 1702671318,
34
- 2107243699,
35
- 2422777587,
36
- 360391179,
37
- 585237960,
38
- 2832187814,
39
- 2662498495,
40
- 2394734758,
41
- 1444276186,
42
- 1837837410,
43
- 961418280,
44
- 1195482276,
45
- 935680843,
46
- 3968754582,
47
- 2211483879,
48
- 1837545159,
49
- 2419172187,
50
- 2042898634,
51
- 2665785964,
52
- 356867850,
53
- 3385622908,
54
- 3868724832,
55
- 2977197007,
56
- 1479685303,
57
- 1540416526,
58
- 331944145,
59
- 3530610791,
60
- 2842657301,
61
- 2372012930,
62
- 639325304,
63
- 3360783663,
64
- 3762064881,
65
- 2160841949,
66
- 4001870304,
67
- 1864777350,
68
- 717368547,
69
- 362746266,
70
- 3779466655,
71
- 1960142933,
72
- 1723756462,
73
- 1326392635,
74
- 2695751926,
75
- 1728155752,
76
- 2347807318,
77
- 1862557049,
78
- 3999800477,
79
- 2277115301,
80
- 1516014806,
81
- 3846995662,
82
- 2388310657,
83
- 2859396105,
84
- 1441398545,
85
- 3439746988,
86
- 3678365781,
87
- 190759243,
88
- 4075004972,
89
- 342634350,
90
- 2045571341,
91
- 557542918,
92
- 2240372410,
93
- 1982579501,
94
- 255722053,
95
- 1496213542,
96
- 2563043770,
97
- 620403458,
98
- 3666797179,
99
- 4194690277,
100
- 1725488508,
101
- 2427139442,
102
- 911138792,
103
- 2810519096,
104
- 403900489,
105
- 1235282796,
106
- 3323510948,
107
- 3976438655,
108
- 2592317228,
109
- 1469307213,
110
- 456462311,
111
- 3393494366,
112
- 669420558,
113
- 1939678322,
114
- 4073521067,
115
- 3342970892,
116
- 2452710290,
117
- 2793129860,
118
- 1342676286,
119
- 2394512596,
120
- 1832972552,
121
- 3814703913,
122
- 945666136,
123
- 3552696630,
124
- 3165169504,
125
- 4234896064,
126
- 3288485605,
127
- 2637492903,
128
- 3688384962,
129
- 1693185353,
130
- 3041897498,
131
- 3666651581,
132
- 216719692,
133
- 2101621578,
134
- 1056505155,
135
- 3806530083,
136
- 2345205292,
137
- 2868630622,
138
- 3289598319,
139
- 597570811,
140
- 2632481252,
141
- 3875619652,
142
- 289253672,
143
- 77311731,
144
- 4133169138,
145
- 3462637509,
146
- 2047139049,
147
- 109094532,
148
- 1309238588,
149
- 4183047643,
150
- 3849080966,
151
- 2612177601,
152
- 2747398543,
153
- 2317389804,
154
- 2206126400,
155
- 3538931825,
156
- 4015230236,
157
- 1548164965,
158
- 3615557249,
159
- 3874732623,
160
- 383396796,
161
- 78135062,
162
- 836187159,
163
- 1405060375,
164
- 4124734680,
165
- 2283841137,
166
- 130389111,
167
- 2370256028,
168
- 3117432748,
169
- 2188669863,
170
- 1274090654,
171
- 136753743,
172
- 3535123905,
173
- 928699189,
174
- 2096609090,
175
- 140690583,
176
- 1335216202,
177
- 1664118110,
178
- 370920611,
179
- 3381310767,
180
- 635177978,
181
- 919944943,
182
- 1794890933,
183
- 3785921605,
184
- 988623168,
185
- 3586512205,
186
- 3578229114,
187
- 2227888166,
188
- 1199377973,
189
- 198574987,
190
- 3359460270,
191
- 41421261,
192
- 3818002247,
193
- 1611622405,
194
- 1172309347,
195
- 1127078025,
196
- 864036890,
197
- 3821592921,
198
- 1074908116,
199
- 4045652492,
200
- 150578148,
201
- 1490173923,
202
- 1255586022,
203
- 3620141376,
204
- 1119552840,
205
- 1604539422,
206
- 2899876376,
207
- 1634692146,
208
- 2430160935,
209
- 3525965146,
210
- 3634230653,
211
- 2801553134,
212
- 2236757588,
213
- 3209439651,
214
- 638937059,
215
- 332630833,
216
- 2099354597,
217
- 2453040579,
218
- 2294611386,
219
- 3583062819,
220
- 3057053813,
221
- 2651436307,
222
- 820127535,
223
- 2234442340,
224
- 2926763826,
225
- 2608312654,
226
- 622663536,
227
- 1327274392,
228
- 579028969,
229
- 1638676546,
230
- 725757522,
231
- 2287475756,
232
- 2245008208,
233
- 583117806,
234
- 1681911299,
235
- 4043302089,
236
- 2733469006,
237
- 3482246612,
238
- 1699131086,
239
- 2009286409,
240
- 1012929586,
241
- 1831443753,
242
- 3401815932,
243
- 2335754234,
244
- 1338527095,
245
- 1234451965,
246
- 2260706742,
247
- 3195944902,
248
- 933824426,
249
- 3055004187,
250
- 442858759,
251
- 3568329250,
252
- 2010957609,
253
- 2249247272,
254
- 1258950970,
255
- 205311363,
256
- 607774665,
257
- 2098340782,
258
- 3806387601,
259
- 1242750989,
260
- 3609867707,
261
- 241559992,
262
- 2980673848,
263
- 249731274,
264
- 2840307599,
265
- 3768697067,
266
- 2786582312,
267
- 234722404,
268
- 501116625,
269
- 3883427569,
270
- 2178103353,
271
- 2378845018,
272
- 4115296692,
273
- 870819025,
274
- 4131469458,
275
- 4141674918,
276
- 2078498292,
277
- 3999732609,
278
- 2537021868,
279
- 3558720844,
280
- 3704987724,
281
- 398283175,
282
- 1690550218,
283
- 2326228611,
284
- 2685433390,
285
- 3899703572,
286
- 3902620680,
287
- 2509824424,
288
- 1513684241,
289
- 2524739654,
290
- 3117375505,
291
- 1379082516,
292
- 2734650653,
293
- 2355589146,
294
- 2451030867,
295
- 2887987439,
296
- 2131990321,
297
- 2214951112,
298
- 4219817646,
299
- 1537640525,
300
- 3339366035,
301
- 2845436015,
302
- 3914197935,
303
- 2658843500,
304
- 3766601378,
305
- 1182641028,
306
- 1457188584,
307
- 4153939547,
308
- 2335418690,
309
- 1152149210,
310
- 870429227,
311
- 4230760942,
312
- 1976040608,
313
- 785192739,
314
- 184789279,
315
- 1377261339,
316
- 707722660,
317
- 3558575032,
318
- 53709080,
319
- 805111893,
320
- 4277091928,
321
- 362164270,
322
- 3343191968,
323
- 764658199,
324
- 1099715208,
325
- 4090812555,
326
- 68096351,
327
- 627278739,
328
- 1254763496,
329
- 4216318088,
330
- 1188064625,
331
- 3429733508,
332
- 3022473048,
333
- 3847088807,
334
- 3053202734,
335
- 3740308065,
336
- 1993837633,
337
- 763503311,
338
- 1037390932,
339
- 588432609,
340
- 1536894725,
341
- 1306273771,
342
- 2543433092,
343
- 1004993106,
344
- 700767389,
345
- 2357318127,
346
- 3169490527,
347
- 3918469492,
348
- 2033525981,
349
- 519262086,
350
- 1129257868,
351
- 2376802674,
352
- 2854805533,
353
- 3880903346,
354
- 74109069,
355
- 2008994945,
356
- 4114401423,
357
- 2811556615,
358
- 3471501456,
359
- 837319289,
360
- 2953139467,
361
- 2327107742,
362
- 133550557,
363
- 3916368228,
364
- 2552796922,
365
- 1537772292,
366
- 1833298346,
367
- 2009274777,
368
- 424528351,
369
- 2717359636,
370
- 2825961025,
371
- 1861953389,
372
- 2884908497,
373
- 3801183979,
374
- 1856316371,
375
- 2235711410,
376
- 2988382290,
377
- 1187502951,
378
- 3671488340,
379
- 1954214264,
380
- 2964041676,
381
- 1604199356,
382
- 3311430473,
383
- 1009672891,
384
- 1165005921,
385
- 3302391626,
386
- 2660756877,
387
- 892284552,
388
- 2393343854,
389
- 3816239257,
390
- 3383925622,
391
- 2632741631,
392
- 150411860,
393
- 4211331326,
394
- 2563945928,
395
- 3147050596,
396
- 86291816,
397
- 1306956905,
398
- 3702342117,
399
- 221494470,
400
- 2274829119,
401
- 3628261915,
402
- 399492089,
403
- 1039477125,
404
- 3636665160,
405
- 1499731795,
406
- 2521019510,
407
- 3820576314,
408
- 651306401,
409
- 2222447223,
410
- 762476894,
411
- 3372134685,
412
- 2773485657,
413
- 1506070889,
414
- 166626664,
415
- 2296565478,
416
- 2627756446,
417
- 1432061762,
418
- 875491433,
419
- 3481499302,
420
- 647568111,
421
- 1079088546,
422
- 3657637708,
423
- 1156169451,
424
- 1864010770,
425
- 2499694803,
426
- 1857267438,
427
- 2291220436,
428
- 3324416067,
429
- 2904742373,
430
- 1611590803,
431
- 3124040330,
432
- 2795308170,
433
- 3443049470,
434
- 2264039167,
435
- 3511022788,
436
- 2491483558,
437
- 3811739223,
438
- 3115068110,
439
- 3279986730,
440
- 3570889482,
441
- 335123233,
442
- 360967562,
443
- 311303413,
444
- 2380553530,
445
- 1843925797,
446
- 947441595,
447
- 3427872459,
448
- 2885833189,
449
- 79715633,
450
- 514452158,
451
- 1736537499,
452
- 3982473220,
453
- 1088572403,
454
- 2384199361,
455
- 1784448850,
456
- 2419864392,
457
- 639538932,
458
- 2112974083,
459
- 3934260396,
460
- 2850658226,
461
- 585502134,
462
- 1760060627,
463
- 3534598283,
464
- 118824413,
465
- 799460671,
466
- 1478757883,
467
- 3050258322,
468
- 2579705998,
469
- 3133048451,
470
- 2223823746,
471
- 603685429,
472
- 2387682555,
473
- 3009556562,
474
- 1521901351,
475
- 2646225121,
476
- 1773849074,
477
- 3347873314,
478
- 1918004826,
479
- 1726862757,
480
- 2513387996,
481
- 508242897,
482
- 339225034,
483
- 1805609427,
484
- 323575129,
485
- 765632792,
486
- 4218304970,
487
- 296616831,
488
- 880218140,
489
- 2165587941,
490
- 2069055842,
491
- 110437221,
492
- 1281149935,
493
- 3527927111,
494
- 4246842114,
495
- 501470319,
496
- 110693585,
497
- 2361144593,
498
- 958597015,
499
- 540902541,
500
- 2351458930,
501
- 3824880566,
502
- 2412074905,
503
- 1526349815,
504
- 2951752081,
505
- 1577401122,
506
- 645843044,
507
- 1122678576,
508
- 3870028103,
509
- 3563016932,
510
- 2394128327,
511
- 1412316709,
512
- 1770485652,
513
- 3328500527,
514
- 2153223048,
515
- 1571141422,
516
- 1950096991,
517
- 3843465276,
518
- 1189099356,
519
- 1707319037,
520
- 1312370001,
521
- 4085046861,
522
- 3231557091,
523
- 3340822452,
524
- 1898079545,
525
- 1373556942,
526
- 891254598,
527
- 543958551,
528
- 3929023245,
529
- 3262642994,
530
- 3221866934,
531
- 2390598216,
532
- 1036903094,
533
- 2097686434,
534
- 129207147,
535
- 2964160713,
536
- 1881698322,
537
- 1515645930,
538
- 3226263079,
539
- 1986344504,
540
- 3005241002,
541
- 3923005616,
542
- 1430681832,
543
- 2048310876,
544
- 631221366,
545
- 2972301268,
546
- 3276451436,
547
- 3841702416,
548
- 2585920783,
549
- 4070240888,
550
- 3697275337,
551
- 564704448,
552
- 266488781,
553
- 3252391941,
554
- 2796272702,
555
- 2665303656,
556
- 3413456714,
557
- 2470069594,
558
- 2470272528,
559
- 1660836326,
560
- 991966684,
561
- 3814972761,
562
- 1794669421,
563
- 892478324,
564
- 461928726,
565
- 2461293569,
566
- 561261597,
567
- 3704935953,
568
- 1737302340,
569
- 4141207295,
570
- 1072950705,
571
- 1100316204,
572
- 49599,
573
- 3655686352,
574
- 2604516846,
575
- 169477890,
576
- 1552495902,
577
- 4076641781,
578
- 4288801538,
579
- 2661645907,
580
- 2681760413,
581
- 3191805957,
582
- 1555588618,
583
- 2005446622,
584
- 2876389060,
585
- 2506534665,
586
- 3737615325,
587
- 539844861,
588
- 2614598422,
589
- 3169363989,
590
- 1460455376,
591
- 3597830757,
592
- 79929582,
593
- 1950458365,
594
- 4188478473,
595
- 675585740,
596
- 905482938,
597
- 3092725,
598
- 3333208631,
599
- 2096209247,
600
- 1647933404,
601
- 2581635632,
602
- 1778498943,
603
- 3092521474,
604
- 988446911,
605
- 1790098568,
606
- 2163371370,
607
- 3874214587,
608
- 3811993331,
609
- 3505387423,
610
- 2828235272,
611
- 1132675285,
612
- 4047174618,
613
- 2760040098,
614
- 3320824721,
615
- 202201724,
616
- 1355357947,
617
- 627906198,
618
- 4127456551,
619
- 2431589489,
620
- 675868086,
621
- 2865627058,
622
- 4123612491,
623
- 4047286524,
624
- 747101435,
625
- 1216754111,
626
- 2427503810,
627
- 3514051898,
628
- 452300667,
629
- 2349273222,
630
- 2983441288,
631
- 1420412231,
632
- 2035374170,
633
- 225
634
- ],
635
- null
636
- ],
637
- "numpy": [
638
- "MT19937",
639
- "[ 15572 980513701 2334715163 3585534944 1822198675 158479007\n 1300107201 2003433159 424170022 4102602503 2437447838 1924282775\n 2084306490 4132823124 4216394081 1526156729 4231078312 3658730376\n 3599347945 3798337125 544676946 3949203055 1596292274 2255158710\n 703032348 636265253 2880318131 3345387760 162413307 2418710564\n 3712245020 2175226970 563044056 2939814745 2838234633 468141434\n 616739654 564867267 2130155541 815641611 601811839 2004017220\n 3627706467 3951463947 810570068 3028421201 454655469 3270345648\n 555008207 3255294172 3259033389 429183833 272696145 2007214122\n 2243779629 1934853570 517873959 1769075612 2057249323 825685197\n 21711389 271106734 3943034084 3547272802 1718926725 3289803093\n 2224067888 3644890877 3431377018 1754806530 2376472949 2892610362\n 1500501344 3824621710 1417356523 4122790557 775716514 1813030967\n 3994108828 391693578 1388189506 1179060416 1727839607 3646219745\n 3467814014 1642908326 1500533561 1281477999 2139613886 209245364\n 1449934686 3593983958 693904485 999778614 847538348 922048571\n 1218470567 916177375 1196217437 3715695791 3572687936 2177952212\n 2976499668 1502043605 3295312140 473156180 3489777550 4116262924\n 726728023 266896064 1207195182 1422796794 3298302327 2546802485\n 3089285157 4087066326 281999229 3833891270 4133996591 3393553875\n 1790581640 1088958086 372349629 1150038540 3967005853 3449868499\n 2783031950 745299387 4177989191 440862037 1630705675 3432601205\n 3976086290 2477494074 2631694750 55867155 3381732871 3988268061\n 4190916192 3426936986 1292805609 2800262421 1433946529 2604379311\n 3803100625 2130255467 4134910564 3389303350 912295037 1986407142\n 60132537 1465812628 2556679777 768839848 561642210 2962288069\n 2900250399 2446769166 2830015834 1820339287 3836052622 3843539266\n 3448899655 719693687 2608513431 807017838 705657612 1313405027\n 308572498 3011745795 3544875535 3662789479 792949536 1679330381\n 2262304426 3714222327 3252067572 3530366244 2847367189 1818638534\n 4196918839 1197188921 1714287054 3610324578 1759524349 658319653\n 4062775635 2170322251 3246948233 467741743 2311362121 1326083926\n 2215935251 2860151158 3543259014 4288886076 1000061343 35607424\n 3800789574 1024328448 2871144392 1452668283 2547917804 794856369\n 3652179617 850026679 66787329 99615284 3360338772 2597540132\n 1809210064 3923947080 4257343339 372176093 3483099399 721873771\n 1101276431 2834898271 76176267 4200628658 2773607601 3516401276\n 3454896121 2354147457 1223458703 3128119783 529915195 2585910314\n 3544532140 551469296 3014823830 3511999051 1463986000 3624754398\n 704833819 3852626834 2711357985 1324465084 1627535231 570708901\n 2717601289 725003848 1942965082 328822490 3620824822 1651096734\n 2785116849 1685019798 1956468619 3534906475 922043906 3007951301\n 4238844799 755293417 3011697131 446474988 2478554140 2374977239\n 1380053003 2243890933 2686719863 2130566958 3840030097 1793607073\n 3796244776 2639065600 3001847252 1477803129 1197364324 2384746982\n 766409730 2722234329 3102942583 2032952634 961381465 1104664880\n 431680105 3809455490 596671995 2974418974 2572169252 3211875863\n 1700476003 2461146565 4138090399 3571815849 2874279728 3348404697\n 1894538786 1654054896 1861571639 643113073 1024018105 110160162\n 3688705424 1588307750 1010684555 1071478016 3908839978 1361810520\n 4077136201 1942152543 3862285636 788921650 3325732842 2086596894\n 2354559341 920752382 2089296746 412564844 3783133394 3542635692\n 151261507 2024582056 27012071 3714904670 2251973869 3415653000\n 3122199829 1793993298 3504479999 2494502573 3472923469 628884745\n 400477905 2068607674 2511099917 76006962 4127192322 656421806\n 2099151600 4055081824 3120853595 1902231402 1793968517 2739152483\n 3675524757 3637643391 2093725246 3500121902 291758869 1652675998\n 1139040273 1626503079 1329269718 3800218668 1301440229 4094598479\n 2030419032 2206069114 2167504310 3568823651 1480132672 4189195270\n 1003514971 2108978250 4235920891 1015223357 1328980599 3065593845\n 772959451 1736648400 1111173855 2673522945 3202521755 1515315402\n 1025987717 2556593896 1098413506 3353399904 2969501057 2094670114\n 2847919939 3042807578 2837794286 1675561875 2905519122 4265188297\n 2610926124 846285729 2241003777 2845770412 2129473060 3762815768\n 2144316967 1546390655 1870814520 1524713984 3716398313 3346580439\n 507361322 3071157273 715371311 667081236 1562427246 1416032086\n 2719153631 1214541502 3927763433 4093412577 1609261242 1472085592\n 2916826031 2284397012 4029669634 4115943418 618581971 2078599894\n 2195634027 568626950 551593208 1404161907 4048083862 206856294\n 2947194844 2767249973 1603907667 1631351803 1522568516 3530861276\n 932299423 1409409376 1006753259 2778802782 2428826612 4160546743\n 1957871534 3303229622 2827456021 2670092224 3383794488 945607414\n 1983832766 358657548 4271708270 755497396 3434273208 1362230140\n 1689853703 801994005 3746197505 3596436611 1542752314 3254670338\n 1548922657 4130814301 3943625172 221411986 713064282 4233062979\n 4075891970 2437106728 544573526 3064910259 151483803 689855214\n 1545945006 3683633364 3289795997 329789217 4168762065 3787243687\n 2265695874 39834191 1266893307 1324209011 1243246540 3973960372\n 541659911 1362379416 1601251635 1863255185 3125665448 4219983083\n 2682202466 567260347 1405575843 3420495303 2758288434 3586390223\n 336221788 2630832173 3573336941 1218191945 2213154892 2821992107\n 3693992851 696758711 279252507 2892498320 1862489732 255938916\n 1661968992 3484941106 4082783555 2936202287 3514506417 1758172715\n 517257860 3411688455 3637760904 1419041484 2137852375 757229925\n 3065881553 339906360 661617426 760623637 1046610245 876310326\n 1014024268 3183719419 2438284349 1776461276 3594106675 2854090739\n 2602159385 4289618092 2857553425 695761542 3083079398 76408317\n 3254461403 153861699 2129194363 2941386031 4094753231 1156889483\n 2242959746 1437290897 1982676962 1514123682 1353077347 1818424511\n 2436251404 4085735581 3547311726 4033030170 2353214711 479069124\n 241656432 4085762125 551929572 2454945299 132044757 524232234\n 3799812788 6269782 1338614034 3443833252 1258856457 4165168463\n 355192100 2534159709 2851727269 2518795790 3366162664 3414356452\n 1869549905 2688919231 3437293505 2606459835 902202159 3721325246\n 1701626821 1124672137 2815257054 3602219523 1714310200 949851574\n 2336520456 504372525 1144232445 195534505 4020833259 946396359\n 1559665603 3237234635 826432554 799463637 3769666381 3612718603\n 762518382 2954474157 4052494419 672053721 1345549799 1873779721\n 2936068468 972026843 1411934901 2952294227 2546812485 2659826516\n 2834428224 2455667549 4281380303 2345320401 3932855189 309111429\n 834893265 2699122382 2146331862 3207660078 1202940344 1030469978\n 3945221344 2900087534 722371964 3096315981 1621397645 907802015\n 450035999 2628913824 3160204880 3961963201 349350642 3107386851\n 560688431 2098806006 3142991583 79041694 2234561220 122454157]",
640
- 624,
641
- 0,
642
- 0.0
643
- ],
644
- "mlx": 1756905967,
645
- "mlx_key": [
646
- 0,
647
- 1756903229
648
- ]
649
- },
650
- "training_args_snapshot": {
651
- "output_dir": "outy1266_align_last7",
652
- "max_kv_size": 1536,
653
- "model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen3-4B-MLX-8bit",
654
- "ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/InferenceIllusionist/gpt-oss-20b-MLX-4bit",
655
- "draft_model_path": null,
656
- "benchmark_every": 0,
657
- "benchmark_dataset": "gsm8k",
658
- "benchmark_dataset_config": "main",
659
- "benchmark_split": "test",
660
- "benchmark_samples": 10,
661
- "benchmark_prompt_key": "question",
662
- "benchmark_answer_key": "answer",
663
- "benchmark_max_new_tokens": 196,
664
- "benchmark_temperature": 0.0,
665
- "benchmark_top_p": 1.0,
666
- "benchmark_top_k": 0,
667
- "benchmark_use_chat_template": true,
668
- "benchmark_stop_on_error": false,
669
- "min_think_tokens": 32,
670
- "think_end_early_bias": -12.0,
671
- "bias_answer_start_after_min_think": true,
672
- "train_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/train.jsonl",
673
- "val_dataset_path": null,
674
- "dataset_name": null,
675
- "dataset_config": null,
676
- "dataset_train_split": "train",
677
- "dataset_val_split": "test",
678
- "dataset_prompt_key": "prompt",
679
- "dataset_answer_key": "completion",
680
- "max_prompt_len": 350,
681
- "max_gen_len": 128,
682
- "system_prompt": null,
683
- "think_start_tag": "<think>",
684
- "think_end_tag": "</think>",
685
- "answer_start_tag": "<answer>",
686
- "answer_end_tag": "</answer>",
687
- "think_boost_tokens": 24,
688
- "think_temperature": 0.15,
689
- "answer_temperature": 0.1,
690
- "sampling_top_p": 0.6,
691
- "sampling_min_p": 0.05,
692
- "sampling_top_k": 40,
693
- "repetition_penalty": 1.15,
694
- "repetition_context_size": 64,
695
- "hard_mask_mcq_first_token": true,
696
- "mcq_letter_lift": 10.0,
697
- "mcq_ban_first_bias": -14.0,
698
- "nonmcq_ban_first_bias": -10.0,
699
- "mcq_close_after_k": 1,
700
- "min_answer_tokens": 6,
701
- "min_answer_tokens_mcq": 1,
702
- "bias_close_think": 6.0,
703
- "bias_answer_start": 3.0,
704
- "punish_reopen_think": -3.0,
705
- "punish_extra_think_end": -6.0,
706
- "bias_eos_after_answer": 4.0,
707
- "allow_tool_calls": false,
708
- "tool_call_penalty": 1.0,
709
- "reward_content_type": "smart",
710
- "reward_format_weight": 0.2,
711
- "reward_content_weight": 0.7,
712
- "think_reward_weight": 0.1,
713
- "think_len_min": 16,
714
- "think_len_max": 64,
715
- "use_lora": false,
716
- "num_rollout_samples": 3,
717
- "ppo_batch_size": 1,
718
- "grpo_beta": 0.04,
719
- "learning_rate": 1.4e-06,
720
- "optimizer_beta1": 0.9,
721
- "optimizer_beta2": 0.95,
722
- "optimizer_weight_decay": 0.01,
723
- "grad_clip_norm": 0.35,
724
- "save_optimizer_state": false,
725
- "lr_schedule_config": {
726
- "name": "cosine_decay",
727
- "arguments": [
728
- 1.4e-06,
729
- 60000,
730
- 2e-07
731
- ],
732
- "warmup": 4000,
733
- "warmup_init": 2e-07
734
- },
735
- "grad_accum_steps": 2,
736
- "num_training_steps": 45869,
737
- "save_every": 10,
738
- "eval_every": 0,
739
- "seed": 15572,
740
- "shuffle_data": true,
741
- "use_grad_checkpointing": false,
742
- "grad_checkpoint_layers": 0,
743
- "log_samples_every": 1,
744
- "max_logged_samples": 50,
745
- "log_prompts": true,
746
- "sample_log_path": null,
747
- "kv_bits": 0,
748
- "kv_group_size": 64,
749
- "quantized_kv_start": 0,
750
- "verbose": true,
751
- "use_wandb": true,
752
- "wandb_project": "reasonable-qwen-4b-mlxv2isi",
753
- "wandb_entity": null,
754
- "wandb_run_name": null,
755
- "resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last7/checkpoint_20250903_224014_periodic_update_2510",
756
- "allow_cross_arch_ref": true,
757
- "align_bridge_path": null,
758
- "align_bridge_weight": 1.0,
759
- "align_pool": "mean",
760
- "align_after_tag": "<|start|>assistant<|channel|>analysis<|message|>",
761
- "effective_batch_size": 4
762
- }
763
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232551_exit_request_update_2602/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232617_shutdown_signal_update_2602/added_tokens.json DELETED
@@ -1,38 +0,0 @@
1
- {
2
- "</answer>": 151669,
3
- "</img_base64>": 151670,
4
- "</json_output>": 151671,
5
- "</ocr_text>": 151672,
6
- "</think>": 151668,
7
- "</tool_call>": 151658,
8
- "</tool_code>": 151673,
9
- "</tool_response>": 151666,
10
- "<answer>": 151674,
11
- "<img_base64>": 151675,
12
- "<json_output>": 151676,
13
- "<ocr_text>": 151677,
14
- "<think>": 151667,
15
- "<tool_call>": 151657,
16
- "<tool_code>": 151678,
17
- "<tool_response>": 151665,
18
- "<|box_end|>": 151649,
19
- "<|box_start|>": 151648,
20
- "<|endoftext|>": 151643,
21
- "<|file_sep|>": 151664,
22
- "<|fim_middle|>": 151660,
23
- "<|fim_pad|>": 151662,
24
- "<|fim_prefix|>": 151659,
25
- "<|fim_suffix|>": 151661,
26
- "<|im_end|>": 151645,
27
- "<|im_start|>": 151644,
28
- "<|image_pad|>": 151655,
29
- "<|object_ref_end|>": 151647,
30
- "<|object_ref_start|>": 151646,
31
- "<|quad_end|>": 151651,
32
- "<|quad_start|>": 151650,
33
- "<|repo_name|>": 151663,
34
- "<|video_pad|>": 151656,
35
- "<|vision_end|>": 151653,
36
- "<|vision_pad|>": 151654,
37
- "<|vision_start|>": 151652
38
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232617_shutdown_signal_update_2602/chat_template.jinja DELETED
@@ -1,96 +0,0 @@
1
- {%- if tools %}
2
- {{- '<|im_start|>system\n' }}
3
- {%- if messages[0].role == 'system' %}
4
- {{- messages[0].content + '\n\n' }}
5
- {%- endif %}
6
- {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
- {%- for tool in tools %}
8
- {{- "\n" }}
9
- {{- tool | tojson }}
10
- {%- endfor %}
11
- {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
- {%- else %}
13
- {%- if messages[0].role == 'system' %}
14
- {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
- {%- endif %}
16
- {%- endif %}
17
- {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
- {%- for message in messages[::-1] %}
19
- {%- set index = (messages|length - 1) - loop.index0 %}
20
- {%- set tool_start = "<tool_response>" %}
21
- {%- set tool_start_length = tool_start|length %}
22
- {%- set start_of_message = message.content[:tool_start_length] %}
23
- {%- set tool_end = "</tool_response>" %}
24
- {%- set tool_end_length = tool_end|length %}
25
- {%- set start_pos = (message.content|length) - tool_end_length %}
26
- {%- if start_pos < 0 %}
27
- {%- set start_pos = 0 %}
28
- {%- endif %}
29
- {%- set end_of_message = message.content[start_pos:] %}
30
- {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
31
- {%- set ns.multi_step_tool = false %}
32
- {%- set ns.last_query_index = index %}
33
- {%- endif %}
34
- {%- endfor %}
35
- {%- for message in messages %}
36
- {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
37
- {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
38
- {%- elif message.role == "assistant" %}
39
- {%- set content = message.content %}
40
- {%- set reasoning_content = '' %}
41
- {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
42
- {%- set reasoning_content = message.reasoning_content %}
43
- {%- else %}
44
- {%- if '</think>' in message.content %}
45
- {%- set content = (message.content.split('</think>')|last).lstrip('\n') %}
46
- {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\n') %}
47
- {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
48
- {%- endif %}
49
- {%- endif %}
50
- {%- if loop.index0 > ns.last_query_index %}
51
- {%- if loop.last or (not loop.last and reasoning_content) %}
52
- {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
53
- {%- else %}
54
- {{- '<|im_start|>' + message.role + '\n' + content }}
55
- {%- endif %}
56
- {%- else %}
57
- {{- '<|im_start|>' + message.role + '\n' + content }}
58
- {%- endif %}
59
- {%- if message.tool_calls %}
60
- {%- for tool_call in message.tool_calls %}
61
- {%- if (loop.first and content) or (not loop.first) %}
62
- {{- '\n' }}
63
- {%- endif %}
64
- {%- if tool_call.function %}
65
- {%- set tool_call = tool_call.function %}
66
- {%- endif %}
67
- {{- '<tool_call>\n{"name": "' }}
68
- {{- tool_call.name }}
69
- {{- '", "arguments": ' }}
70
- {%- if tool_call.arguments is string %}
71
- {{- tool_call.arguments }}
72
- {%- else %}
73
- {{- tool_call.arguments | tojson }}
74
- {%- endif %}
75
- {{- '}\n</tool_call>' }}
76
- {%- endfor %}
77
- {%- endif %}
78
- {{- '<|im_end|>\n' }}
79
- {%- elif message.role == "tool" %}
80
- {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
81
- {{- '<|im_start|>user' }}
82
- {%- endif %}
83
- {{- '\n<tool_response>\n' }}
84
- {{- message.content }}
85
- {{- '\n</tool_response>' }}
86
- {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
87
- {{- '<|im_end|>\n' }}
88
- {%- endif %}
89
- {%- endif %}
90
- {%- endfor %}
91
- {%- if add_generation_prompt %}
92
- {{- '<|im_start|>assistant\n' }}
93
- {%- if enable_thinking is defined and enable_thinking is false %}
94
- {{- '<think>\n\n</think>\n\n' }}
95
- {%- endif %}
96
- {%- endif %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232617_shutdown_signal_update_2602/config.json DELETED
@@ -1,30 +0,0 @@
1
- {
2
- "architectures": [
3
- "Qwen3ForCausalLM"
4
- ],
5
- "attention_bias": false,
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
- "eos_token_id": 151645,
9
- "head_dim": 128,
10
- "hidden_act": "silu",
11
- "hidden_size": 2560,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 9728,
14
- "max_position_embeddings": 40960,
15
- "max_window_layers": 36,
16
- "model_type": "qwen3",
17
- "num_attention_heads": 32,
18
- "num_hidden_layers": 36,
19
- "num_key_value_heads": 8,
20
- "rms_norm_eps": 1e-06,
21
- "rope_scaling": null,
22
- "rope_theta": 1000000,
23
- "sliding_window": null,
24
- "tie_word_embeddings": true,
25
- "torch_dtype": "bfloat16",
26
- "transformers_version": "4.51.0",
27
- "use_cache": true,
28
- "use_sliding_window": false,
29
- "vocab_size": 151936
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232617_shutdown_signal_update_2602/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232617_shutdown_signal_update_2602/special_tokens_map.json DELETED
@@ -1,102 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- {
4
- "content": "</answer>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "</img_base64>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "</json_output>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- {
25
- "content": "</ocr_text>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "</think>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</tool_code>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "<answer>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "<img_base64>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "<json_output>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "<ocr_text>",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
- {
74
- "content": "<think>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
- },
80
- {
81
- "content": "<tool_code>",
82
- "lstrip": false,
83
- "normalized": false,
84
- "rstrip": false,
85
- "single_word": false
86
- }
87
- ],
88
- "eos_token": {
89
- "content": "<|im_end|>",
90
- "lstrip": false,
91
- "normalized": false,
92
- "rstrip": false,
93
- "single_word": false
94
- },
95
- "pad_token": {
96
- "content": "<|endoftext|>",
97
- "lstrip": false,
98
- "normalized": false,
99
- "rstrip": false,
100
- "single_word": false
101
- }
102
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232617_shutdown_signal_update_2602/tokenizer_config.json DELETED
@@ -1,318 +0,0 @@
1
- {
2
- "add_bos_token": false,
3
- "add_prefix_space": false,
4
- "added_tokens_decoder": {
5
- "151643": {
6
- "content": "<|endoftext|>",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "151644": {
14
- "content": "<|im_start|>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "151645": {
22
- "content": "<|im_end|>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "151646": {
30
- "content": "<|object_ref_start|>",
31
- "lstrip": false,
32
- "normalized": false,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": true
36
- },
37
- "151647": {
38
- "content": "<|object_ref_end|>",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": true
44
- },
45
- "151648": {
46
- "content": "<|box_start|>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": true
52
- },
53
- "151649": {
54
- "content": "<|box_end|>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- },
61
- "151650": {
62
- "content": "<|quad_start|>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": true
68
- },
69
- "151651": {
70
- "content": "<|quad_end|>",
71
- "lstrip": false,
72
- "normalized": false,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": true
76
- },
77
- "151652": {
78
- "content": "<|vision_start|>",
79
- "lstrip": false,
80
- "normalized": false,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": true
84
- },
85
- "151653": {
86
- "content": "<|vision_end|>",
87
- "lstrip": false,
88
- "normalized": false,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": true
92
- },
93
- "151654": {
94
- "content": "<|vision_pad|>",
95
- "lstrip": false,
96
- "normalized": false,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": true
100
- },
101
- "151655": {
102
- "content": "<|image_pad|>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": true
108
- },
109
- "151656": {
110
- "content": "<|video_pad|>",
111
- "lstrip": false,
112
- "normalized": false,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": true
116
- },
117
- "151657": {
118
- "content": "<tool_call>",
119
- "lstrip": false,
120
- "normalized": false,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": false
124
- },
125
- "151658": {
126
- "content": "</tool_call>",
127
- "lstrip": false,
128
- "normalized": false,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": false
132
- },
133
- "151659": {
134
- "content": "<|fim_prefix|>",
135
- "lstrip": false,
136
- "normalized": false,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": false
140
- },
141
- "151660": {
142
- "content": "<|fim_middle|>",
143
- "lstrip": false,
144
- "normalized": false,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": false
148
- },
149
- "151661": {
150
- "content": "<|fim_suffix|>",
151
- "lstrip": false,
152
- "normalized": false,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": false
156
- },
157
- "151662": {
158
- "content": "<|fim_pad|>",
159
- "lstrip": false,
160
- "normalized": false,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": false
164
- },
165
- "151663": {
166
- "content": "<|repo_name|>",
167
- "lstrip": false,
168
- "normalized": false,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": false
172
- },
173
- "151664": {
174
- "content": "<|file_sep|>",
175
- "lstrip": false,
176
- "normalized": false,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": false
180
- },
181
- "151665": {
182
- "content": "<tool_response>",
183
- "lstrip": false,
184
- "normalized": false,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": false
188
- },
189
- "151666": {
190
- "content": "</tool_response>",
191
- "lstrip": false,
192
- "normalized": false,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": false
196
- },
197
- "151667": {
198
- "content": "<think>",
199
- "lstrip": false,
200
- "normalized": false,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": true
204
- },
205
- "151668": {
206
- "content": "</think>",
207
- "lstrip": false,
208
- "normalized": false,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": true
212
- },
213
- "151669": {
214
- "content": "</answer>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false,
219
- "special": true
220
- },
221
- "151670": {
222
- "content": "</img_base64>",
223
- "lstrip": false,
224
- "normalized": false,
225
- "rstrip": false,
226
- "single_word": false,
227
- "special": true
228
- },
229
- "151671": {
230
- "content": "</json_output>",
231
- "lstrip": false,
232
- "normalized": false,
233
- "rstrip": false,
234
- "single_word": false,
235
- "special": true
236
- },
237
- "151672": {
238
- "content": "</ocr_text>",
239
- "lstrip": false,
240
- "normalized": false,
241
- "rstrip": false,
242
- "single_word": false,
243
- "special": true
244
- },
245
- "151673": {
246
- "content": "</tool_code>",
247
- "lstrip": false,
248
- "normalized": false,
249
- "rstrip": false,
250
- "single_word": false,
251
- "special": true
252
- },
253
- "151674": {
254
- "content": "<answer>",
255
- "lstrip": false,
256
- "normalized": false,
257
- "rstrip": false,
258
- "single_word": false,
259
- "special": true
260
- },
261
- "151675": {
262
- "content": "<img_base64>",
263
- "lstrip": false,
264
- "normalized": false,
265
- "rstrip": false,
266
- "single_word": false,
267
- "special": true
268
- },
269
- "151676": {
270
- "content": "<json_output>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false,
275
- "special": true
276
- },
277
- "151677": {
278
- "content": "<ocr_text>",
279
- "lstrip": false,
280
- "normalized": false,
281
- "rstrip": false,
282
- "single_word": false,
283
- "special": true
284
- },
285
- "151678": {
286
- "content": "<tool_code>",
287
- "lstrip": false,
288
- "normalized": false,
289
- "rstrip": false,
290
- "single_word": false,
291
- "special": true
292
- }
293
- },
294
- "additional_special_tokens": [
295
- "</answer>",
296
- "</img_base64>",
297
- "</json_output>",
298
- "</ocr_text>",
299
- "</think>",
300
- "</tool_code>",
301
- "<answer>",
302
- "<img_base64>",
303
- "<json_output>",
304
- "<ocr_text>",
305
- "<think>",
306
- "<tool_code>"
307
- ],
308
- "bos_token": null,
309
- "clean_up_tokenization_spaces": false,
310
- "eos_token": "<|im_end|>",
311
- "errors": "replace",
312
- "extra_special_tokens": {},
313
- "model_max_length": 131072,
314
- "pad_token": "<|endoftext|>",
315
- "split_special_tokens": false,
316
- "tokenizer_class": "Qwen2Tokenizer",
317
- "unk_token": null
318
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232617_shutdown_signal_update_2602/training_state.json DELETED
@@ -1,763 +0,0 @@
1
- {
2
- "global_step": 2602,
3
- "num_updates": 2602,
4
- "use_lora": false,
5
- "rng_state": {
6
- "python": [
7
- 3,
8
- [
9
- 3228212754,
10
- 279998097,
11
- 2056665714,
12
- 3603597067,
13
- 3476614728,
14
- 3031000,
15
- 2954941703,
16
- 3205203341,
17
- 1022055790,
18
- 1415253971,
19
- 2181335448,
20
- 3582899399,
21
- 1644250258,
22
- 1793404199,
23
- 2566317855,
24
- 109101073,
25
- 3150506052,
26
- 2724067265,
27
- 97547420,
28
- 2995366220,
29
- 3519916584,
30
- 3209457352,
31
- 3126309277,
32
- 3451453441,
33
- 1702671318,
34
- 2107243699,
35
- 2422777587,
36
- 360391179,
37
- 585237960,
38
- 2832187814,
39
- 2662498495,
40
- 2394734758,
41
- 1444276186,
42
- 1837837410,
43
- 961418280,
44
- 1195482276,
45
- 935680843,
46
- 3968754582,
47
- 2211483879,
48
- 1837545159,
49
- 2419172187,
50
- 2042898634,
51
- 2665785964,
52
- 356867850,
53
- 3385622908,
54
- 3868724832,
55
- 2977197007,
56
- 1479685303,
57
- 1540416526,
58
- 331944145,
59
- 3530610791,
60
- 2842657301,
61
- 2372012930,
62
- 639325304,
63
- 3360783663,
64
- 3762064881,
65
- 2160841949,
66
- 4001870304,
67
- 1864777350,
68
- 717368547,
69
- 362746266,
70
- 3779466655,
71
- 1960142933,
72
- 1723756462,
73
- 1326392635,
74
- 2695751926,
75
- 1728155752,
76
- 2347807318,
77
- 1862557049,
78
- 3999800477,
79
- 2277115301,
80
- 1516014806,
81
- 3846995662,
82
- 2388310657,
83
- 2859396105,
84
- 1441398545,
85
- 3439746988,
86
- 3678365781,
87
- 190759243,
88
- 4075004972,
89
- 342634350,
90
- 2045571341,
91
- 557542918,
92
- 2240372410,
93
- 1982579501,
94
- 255722053,
95
- 1496213542,
96
- 2563043770,
97
- 620403458,
98
- 3666797179,
99
- 4194690277,
100
- 1725488508,
101
- 2427139442,
102
- 911138792,
103
- 2810519096,
104
- 403900489,
105
- 1235282796,
106
- 3323510948,
107
- 3976438655,
108
- 2592317228,
109
- 1469307213,
110
- 456462311,
111
- 3393494366,
112
- 669420558,
113
- 1939678322,
114
- 4073521067,
115
- 3342970892,
116
- 2452710290,
117
- 2793129860,
118
- 1342676286,
119
- 2394512596,
120
- 1832972552,
121
- 3814703913,
122
- 945666136,
123
- 3552696630,
124
- 3165169504,
125
- 4234896064,
126
- 3288485605,
127
- 2637492903,
128
- 3688384962,
129
- 1693185353,
130
- 3041897498,
131
- 3666651581,
132
- 216719692,
133
- 2101621578,
134
- 1056505155,
135
- 3806530083,
136
- 2345205292,
137
- 2868630622,
138
- 3289598319,
139
- 597570811,
140
- 2632481252,
141
- 3875619652,
142
- 289253672,
143
- 77311731,
144
- 4133169138,
145
- 3462637509,
146
- 2047139049,
147
- 109094532,
148
- 1309238588,
149
- 4183047643,
150
- 3849080966,
151
- 2612177601,
152
- 2747398543,
153
- 2317389804,
154
- 2206126400,
155
- 3538931825,
156
- 4015230236,
157
- 1548164965,
158
- 3615557249,
159
- 3874732623,
160
- 383396796,
161
- 78135062,
162
- 836187159,
163
- 1405060375,
164
- 4124734680,
165
- 2283841137,
166
- 130389111,
167
- 2370256028,
168
- 3117432748,
169
- 2188669863,
170
- 1274090654,
171
- 136753743,
172
- 3535123905,
173
- 928699189,
174
- 2096609090,
175
- 140690583,
176
- 1335216202,
177
- 1664118110,
178
- 370920611,
179
- 3381310767,
180
- 635177978,
181
- 919944943,
182
- 1794890933,
183
- 3785921605,
184
- 988623168,
185
- 3586512205,
186
- 3578229114,
187
- 2227888166,
188
- 1199377973,
189
- 198574987,
190
- 3359460270,
191
- 41421261,
192
- 3818002247,
193
- 1611622405,
194
- 1172309347,
195
- 1127078025,
196
- 864036890,
197
- 3821592921,
198
- 1074908116,
199
- 4045652492,
200
- 150578148,
201
- 1490173923,
202
- 1255586022,
203
- 3620141376,
204
- 1119552840,
205
- 1604539422,
206
- 2899876376,
207
- 1634692146,
208
- 2430160935,
209
- 3525965146,
210
- 3634230653,
211
- 2801553134,
212
- 2236757588,
213
- 3209439651,
214
- 638937059,
215
- 332630833,
216
- 2099354597,
217
- 2453040579,
218
- 2294611386,
219
- 3583062819,
220
- 3057053813,
221
- 2651436307,
222
- 820127535,
223
- 2234442340,
224
- 2926763826,
225
- 2608312654,
226
- 622663536,
227
- 1327274392,
228
- 579028969,
229
- 1638676546,
230
- 725757522,
231
- 2287475756,
232
- 2245008208,
233
- 583117806,
234
- 1681911299,
235
- 4043302089,
236
- 2733469006,
237
- 3482246612,
238
- 1699131086,
239
- 2009286409,
240
- 1012929586,
241
- 1831443753,
242
- 3401815932,
243
- 2335754234,
244
- 1338527095,
245
- 1234451965,
246
- 2260706742,
247
- 3195944902,
248
- 933824426,
249
- 3055004187,
250
- 442858759,
251
- 3568329250,
252
- 2010957609,
253
- 2249247272,
254
- 1258950970,
255
- 205311363,
256
- 607774665,
257
- 2098340782,
258
- 3806387601,
259
- 1242750989,
260
- 3609867707,
261
- 241559992,
262
- 2980673848,
263
- 249731274,
264
- 2840307599,
265
- 3768697067,
266
- 2786582312,
267
- 234722404,
268
- 501116625,
269
- 3883427569,
270
- 2178103353,
271
- 2378845018,
272
- 4115296692,
273
- 870819025,
274
- 4131469458,
275
- 4141674918,
276
- 2078498292,
277
- 3999732609,
278
- 2537021868,
279
- 3558720844,
280
- 3704987724,
281
- 398283175,
282
- 1690550218,
283
- 2326228611,
284
- 2685433390,
285
- 3899703572,
286
- 3902620680,
287
- 2509824424,
288
- 1513684241,
289
- 2524739654,
290
- 3117375505,
291
- 1379082516,
292
- 2734650653,
293
- 2355589146,
294
- 2451030867,
295
- 2887987439,
296
- 2131990321,
297
- 2214951112,
298
- 4219817646,
299
- 1537640525,
300
- 3339366035,
301
- 2845436015,
302
- 3914197935,
303
- 2658843500,
304
- 3766601378,
305
- 1182641028,
306
- 1457188584,
307
- 4153939547,
308
- 2335418690,
309
- 1152149210,
310
- 870429227,
311
- 4230760942,
312
- 1976040608,
313
- 785192739,
314
- 184789279,
315
- 1377261339,
316
- 707722660,
317
- 3558575032,
318
- 53709080,
319
- 805111893,
320
- 4277091928,
321
- 362164270,
322
- 3343191968,
323
- 764658199,
324
- 1099715208,
325
- 4090812555,
326
- 68096351,
327
- 627278739,
328
- 1254763496,
329
- 4216318088,
330
- 1188064625,
331
- 3429733508,
332
- 3022473048,
333
- 3847088807,
334
- 3053202734,
335
- 3740308065,
336
- 1993837633,
337
- 763503311,
338
- 1037390932,
339
- 588432609,
340
- 1536894725,
341
- 1306273771,
342
- 2543433092,
343
- 1004993106,
344
- 700767389,
345
- 2357318127,
346
- 3169490527,
347
- 3918469492,
348
- 2033525981,
349
- 519262086,
350
- 1129257868,
351
- 2376802674,
352
- 2854805533,
353
- 3880903346,
354
- 74109069,
355
- 2008994945,
356
- 4114401423,
357
- 2811556615,
358
- 3471501456,
359
- 837319289,
360
- 2953139467,
361
- 2327107742,
362
- 133550557,
363
- 3916368228,
364
- 2552796922,
365
- 1537772292,
366
- 1833298346,
367
- 2009274777,
368
- 424528351,
369
- 2717359636,
370
- 2825961025,
371
- 1861953389,
372
- 2884908497,
373
- 3801183979,
374
- 1856316371,
375
- 2235711410,
376
- 2988382290,
377
- 1187502951,
378
- 3671488340,
379
- 1954214264,
380
- 2964041676,
381
- 1604199356,
382
- 3311430473,
383
- 1009672891,
384
- 1165005921,
385
- 3302391626,
386
- 2660756877,
387
- 892284552,
388
- 2393343854,
389
- 3816239257,
390
- 3383925622,
391
- 2632741631,
392
- 150411860,
393
- 4211331326,
394
- 2563945928,
395
- 3147050596,
396
- 86291816,
397
- 1306956905,
398
- 3702342117,
399
- 221494470,
400
- 2274829119,
401
- 3628261915,
402
- 399492089,
403
- 1039477125,
404
- 3636665160,
405
- 1499731795,
406
- 2521019510,
407
- 3820576314,
408
- 651306401,
409
- 2222447223,
410
- 762476894,
411
- 3372134685,
412
- 2773485657,
413
- 1506070889,
414
- 166626664,
415
- 2296565478,
416
- 2627756446,
417
- 1432061762,
418
- 875491433,
419
- 3481499302,
420
- 647568111,
421
- 1079088546,
422
- 3657637708,
423
- 1156169451,
424
- 1864010770,
425
- 2499694803,
426
- 1857267438,
427
- 2291220436,
428
- 3324416067,
429
- 2904742373,
430
- 1611590803,
431
- 3124040330,
432
- 2795308170,
433
- 3443049470,
434
- 2264039167,
435
- 3511022788,
436
- 2491483558,
437
- 3811739223,
438
- 3115068110,
439
- 3279986730,
440
- 3570889482,
441
- 335123233,
442
- 360967562,
443
- 311303413,
444
- 2380553530,
445
- 1843925797,
446
- 947441595,
447
- 3427872459,
448
- 2885833189,
449
- 79715633,
450
- 514452158,
451
- 1736537499,
452
- 3982473220,
453
- 1088572403,
454
- 2384199361,
455
- 1784448850,
456
- 2419864392,
457
- 639538932,
458
- 2112974083,
459
- 3934260396,
460
- 2850658226,
461
- 585502134,
462
- 1760060627,
463
- 3534598283,
464
- 118824413,
465
- 799460671,
466
- 1478757883,
467
- 3050258322,
468
- 2579705998,
469
- 3133048451,
470
- 2223823746,
471
- 603685429,
472
- 2387682555,
473
- 3009556562,
474
- 1521901351,
475
- 2646225121,
476
- 1773849074,
477
- 3347873314,
478
- 1918004826,
479
- 1726862757,
480
- 2513387996,
481
- 508242897,
482
- 339225034,
483
- 1805609427,
484
- 323575129,
485
- 765632792,
486
- 4218304970,
487
- 296616831,
488
- 880218140,
489
- 2165587941,
490
- 2069055842,
491
- 110437221,
492
- 1281149935,
493
- 3527927111,
494
- 4246842114,
495
- 501470319,
496
- 110693585,
497
- 2361144593,
498
- 958597015,
499
- 540902541,
500
- 2351458930,
501
- 3824880566,
502
- 2412074905,
503
- 1526349815,
504
- 2951752081,
505
- 1577401122,
506
- 645843044,
507
- 1122678576,
508
- 3870028103,
509
- 3563016932,
510
- 2394128327,
511
- 1412316709,
512
- 1770485652,
513
- 3328500527,
514
- 2153223048,
515
- 1571141422,
516
- 1950096991,
517
- 3843465276,
518
- 1189099356,
519
- 1707319037,
520
- 1312370001,
521
- 4085046861,
522
- 3231557091,
523
- 3340822452,
524
- 1898079545,
525
- 1373556942,
526
- 891254598,
527
- 543958551,
528
- 3929023245,
529
- 3262642994,
530
- 3221866934,
531
- 2390598216,
532
- 1036903094,
533
- 2097686434,
534
- 129207147,
535
- 2964160713,
536
- 1881698322,
537
- 1515645930,
538
- 3226263079,
539
- 1986344504,
540
- 3005241002,
541
- 3923005616,
542
- 1430681832,
543
- 2048310876,
544
- 631221366,
545
- 2972301268,
546
- 3276451436,
547
- 3841702416,
548
- 2585920783,
549
- 4070240888,
550
- 3697275337,
551
- 564704448,
552
- 266488781,
553
- 3252391941,
554
- 2796272702,
555
- 2665303656,
556
- 3413456714,
557
- 2470069594,
558
- 2470272528,
559
- 1660836326,
560
- 991966684,
561
- 3814972761,
562
- 1794669421,
563
- 892478324,
564
- 461928726,
565
- 2461293569,
566
- 561261597,
567
- 3704935953,
568
- 1737302340,
569
- 4141207295,
570
- 1072950705,
571
- 1100316204,
572
- 49599,
573
- 3655686352,
574
- 2604516846,
575
- 169477890,
576
- 1552495902,
577
- 4076641781,
578
- 4288801538,
579
- 2661645907,
580
- 2681760413,
581
- 3191805957,
582
- 1555588618,
583
- 2005446622,
584
- 2876389060,
585
- 2506534665,
586
- 3737615325,
587
- 539844861,
588
- 2614598422,
589
- 3169363989,
590
- 1460455376,
591
- 3597830757,
592
- 79929582,
593
- 1950458365,
594
- 4188478473,
595
- 675585740,
596
- 905482938,
597
- 3092725,
598
- 3333208631,
599
- 2096209247,
600
- 1647933404,
601
- 2581635632,
602
- 1778498943,
603
- 3092521474,
604
- 988446911,
605
- 1790098568,
606
- 2163371370,
607
- 3874214587,
608
- 3811993331,
609
- 3505387423,
610
- 2828235272,
611
- 1132675285,
612
- 4047174618,
613
- 2760040098,
614
- 3320824721,
615
- 202201724,
616
- 1355357947,
617
- 627906198,
618
- 4127456551,
619
- 2431589489,
620
- 675868086,
621
- 2865627058,
622
- 4123612491,
623
- 4047286524,
624
- 747101435,
625
- 1216754111,
626
- 2427503810,
627
- 3514051898,
628
- 452300667,
629
- 2349273222,
630
- 2983441288,
631
- 1420412231,
632
- 2035374170,
633
- 336
634
- ],
635
- null
636
- ],
637
- "numpy": [
638
- "MT19937",
639
- "[ 15572 980513701 2334715163 3585534944 1822198675 158479007\n 1300107201 2003433159 424170022 4102602503 2437447838 1924282775\n 2084306490 4132823124 4216394081 1526156729 4231078312 3658730376\n 3599347945 3798337125 544676946 3949203055 1596292274 2255158710\n 703032348 636265253 2880318131 3345387760 162413307 2418710564\n 3712245020 2175226970 563044056 2939814745 2838234633 468141434\n 616739654 564867267 2130155541 815641611 601811839 2004017220\n 3627706467 3951463947 810570068 3028421201 454655469 3270345648\n 555008207 3255294172 3259033389 429183833 272696145 2007214122\n 2243779629 1934853570 517873959 1769075612 2057249323 825685197\n 21711389 271106734 3943034084 3547272802 1718926725 3289803093\n 2224067888 3644890877 3431377018 1754806530 2376472949 2892610362\n 1500501344 3824621710 1417356523 4122790557 775716514 1813030967\n 3994108828 391693578 1388189506 1179060416 1727839607 3646219745\n 3467814014 1642908326 1500533561 1281477999 2139613886 209245364\n 1449934686 3593983958 693904485 999778614 847538348 922048571\n 1218470567 916177375 1196217437 3715695791 3572687936 2177952212\n 2976499668 1502043605 3295312140 473156180 3489777550 4116262924\n 726728023 266896064 1207195182 1422796794 3298302327 2546802485\n 3089285157 4087066326 281999229 3833891270 4133996591 3393553875\n 1790581640 1088958086 372349629 1150038540 3967005853 3449868499\n 2783031950 745299387 4177989191 440862037 1630705675 3432601205\n 3976086290 2477494074 2631694750 55867155 3381732871 3988268061\n 4190916192 3426936986 1292805609 2800262421 1433946529 2604379311\n 3803100625 2130255467 4134910564 3389303350 912295037 1986407142\n 60132537 1465812628 2556679777 768839848 561642210 2962288069\n 2900250399 2446769166 2830015834 1820339287 3836052622 3843539266\n 3448899655 719693687 2608513431 807017838 705657612 1313405027\n 308572498 3011745795 3544875535 3662789479 792949536 1679330381\n 2262304426 3714222327 3252067572 3530366244 2847367189 1818638534\n 4196918839 1197188921 1714287054 3610324578 1759524349 658319653\n 4062775635 2170322251 3246948233 467741743 2311362121 1326083926\n 2215935251 2860151158 3543259014 4288886076 1000061343 35607424\n 3800789574 1024328448 2871144392 1452668283 2547917804 794856369\n 3652179617 850026679 66787329 99615284 3360338772 2597540132\n 1809210064 3923947080 4257343339 372176093 3483099399 721873771\n 1101276431 2834898271 76176267 4200628658 2773607601 3516401276\n 3454896121 2354147457 1223458703 3128119783 529915195 2585910314\n 3544532140 551469296 3014823830 3511999051 1463986000 3624754398\n 704833819 3852626834 2711357985 1324465084 1627535231 570708901\n 2717601289 725003848 1942965082 328822490 3620824822 1651096734\n 2785116849 1685019798 1956468619 3534906475 922043906 3007951301\n 4238844799 755293417 3011697131 446474988 2478554140 2374977239\n 1380053003 2243890933 2686719863 2130566958 3840030097 1793607073\n 3796244776 2639065600 3001847252 1477803129 1197364324 2384746982\n 766409730 2722234329 3102942583 2032952634 961381465 1104664880\n 431680105 3809455490 596671995 2974418974 2572169252 3211875863\n 1700476003 2461146565 4138090399 3571815849 2874279728 3348404697\n 1894538786 1654054896 1861571639 643113073 1024018105 110160162\n 3688705424 1588307750 1010684555 1071478016 3908839978 1361810520\n 4077136201 1942152543 3862285636 788921650 3325732842 2086596894\n 2354559341 920752382 2089296746 412564844 3783133394 3542635692\n 151261507 2024582056 27012071 3714904670 2251973869 3415653000\n 3122199829 1793993298 3504479999 2494502573 3472923469 628884745\n 400477905 2068607674 2511099917 76006962 4127192322 656421806\n 2099151600 4055081824 3120853595 1902231402 1793968517 2739152483\n 3675524757 3637643391 2093725246 3500121902 291758869 1652675998\n 1139040273 1626503079 1329269718 3800218668 1301440229 4094598479\n 2030419032 2206069114 2167504310 3568823651 1480132672 4189195270\n 1003514971 2108978250 4235920891 1015223357 1328980599 3065593845\n 772959451 1736648400 1111173855 2673522945 3202521755 1515315402\n 1025987717 2556593896 1098413506 3353399904 2969501057 2094670114\n 2847919939 3042807578 2837794286 1675561875 2905519122 4265188297\n 2610926124 846285729 2241003777 2845770412 2129473060 3762815768\n 2144316967 1546390655 1870814520 1524713984 3716398313 3346580439\n 507361322 3071157273 715371311 667081236 1562427246 1416032086\n 2719153631 1214541502 3927763433 4093412577 1609261242 1472085592\n 2916826031 2284397012 4029669634 4115943418 618581971 2078599894\n 2195634027 568626950 551593208 1404161907 4048083862 206856294\n 2947194844 2767249973 1603907667 1631351803 1522568516 3530861276\n 932299423 1409409376 1006753259 2778802782 2428826612 4160546743\n 1957871534 3303229622 2827456021 2670092224 3383794488 945607414\n 1983832766 358657548 4271708270 755497396 3434273208 1362230140\n 1689853703 801994005 3746197505 3596436611 1542752314 3254670338\n 1548922657 4130814301 3943625172 221411986 713064282 4233062979\n 4075891970 2437106728 544573526 3064910259 151483803 689855214\n 1545945006 3683633364 3289795997 329789217 4168762065 3787243687\n 2265695874 39834191 1266893307 1324209011 1243246540 3973960372\n 541659911 1362379416 1601251635 1863255185 3125665448 4219983083\n 2682202466 567260347 1405575843 3420495303 2758288434 3586390223\n 336221788 2630832173 3573336941 1218191945 2213154892 2821992107\n 3693992851 696758711 279252507 2892498320 1862489732 255938916\n 1661968992 3484941106 4082783555 2936202287 3514506417 1758172715\n 517257860 3411688455 3637760904 1419041484 2137852375 757229925\n 3065881553 339906360 661617426 760623637 1046610245 876310326\n 1014024268 3183719419 2438284349 1776461276 3594106675 2854090739\n 2602159385 4289618092 2857553425 695761542 3083079398 76408317\n 3254461403 153861699 2129194363 2941386031 4094753231 1156889483\n 2242959746 1437290897 1982676962 1514123682 1353077347 1818424511\n 2436251404 4085735581 3547311726 4033030170 2353214711 479069124\n 241656432 4085762125 551929572 2454945299 132044757 524232234\n 3799812788 6269782 1338614034 3443833252 1258856457 4165168463\n 355192100 2534159709 2851727269 2518795790 3366162664 3414356452\n 1869549905 2688919231 3437293505 2606459835 902202159 3721325246\n 1701626821 1124672137 2815257054 3602219523 1714310200 949851574\n 2336520456 504372525 1144232445 195534505 4020833259 946396359\n 1559665603 3237234635 826432554 799463637 3769666381 3612718603\n 762518382 2954474157 4052494419 672053721 1345549799 1873779721\n 2936068468 972026843 1411934901 2952294227 2546812485 2659826516\n 2834428224 2455667549 4281380303 2345320401 3932855189 309111429\n 834893265 2699122382 2146331862 3207660078 1202940344 1030469978\n 3945221344 2900087534 722371964 3096315981 1621397645 907802015\n 450035999 2628913824 3160204880 3961963201 349350642 3107386851\n 560688431 2098806006 3142991583 79041694 2234561220 122454157]",
640
- 624,
641
- 0,
642
- 0.0
643
- ],
644
- "mlx": 1756905991,
645
- "mlx_key": [
646
- 0,
647
- 1756903229
648
- ]
649
- },
650
- "training_args_snapshot": {
651
- "output_dir": "outy1266_align_last7",
652
- "max_kv_size": 1536,
653
- "model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen3-4B-MLX-8bit",
654
- "ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/InferenceIllusionist/gpt-oss-20b-MLX-4bit",
655
- "draft_model_path": null,
656
- "benchmark_every": 0,
657
- "benchmark_dataset": "gsm8k",
658
- "benchmark_dataset_config": "main",
659
- "benchmark_split": "test",
660
- "benchmark_samples": 10,
661
- "benchmark_prompt_key": "question",
662
- "benchmark_answer_key": "answer",
663
- "benchmark_max_new_tokens": 196,
664
- "benchmark_temperature": 0.0,
665
- "benchmark_top_p": 1.0,
666
- "benchmark_top_k": 0,
667
- "benchmark_use_chat_template": true,
668
- "benchmark_stop_on_error": false,
669
- "min_think_tokens": 32,
670
- "think_end_early_bias": -12.0,
671
- "bias_answer_start_after_min_think": true,
672
- "train_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/train.jsonl",
673
- "val_dataset_path": null,
674
- "dataset_name": null,
675
- "dataset_config": null,
676
- "dataset_train_split": "train",
677
- "dataset_val_split": "test",
678
- "dataset_prompt_key": "prompt",
679
- "dataset_answer_key": "completion",
680
- "max_prompt_len": 350,
681
- "max_gen_len": 128,
682
- "system_prompt": null,
683
- "think_start_tag": "<think>",
684
- "think_end_tag": "</think>",
685
- "answer_start_tag": "<answer>",
686
- "answer_end_tag": "</answer>",
687
- "think_boost_tokens": 24,
688
- "think_temperature": 0.15,
689
- "answer_temperature": 0.1,
690
- "sampling_top_p": 0.6,
691
- "sampling_min_p": 0.05,
692
- "sampling_top_k": 40,
693
- "repetition_penalty": 1.15,
694
- "repetition_context_size": 64,
695
- "hard_mask_mcq_first_token": true,
696
- "mcq_letter_lift": 10.0,
697
- "mcq_ban_first_bias": -14.0,
698
- "nonmcq_ban_first_bias": -10.0,
699
- "mcq_close_after_k": 1,
700
- "min_answer_tokens": 6,
701
- "min_answer_tokens_mcq": 1,
702
- "bias_close_think": 6.0,
703
- "bias_answer_start": 3.0,
704
- "punish_reopen_think": -3.0,
705
- "punish_extra_think_end": -6.0,
706
- "bias_eos_after_answer": 4.0,
707
- "allow_tool_calls": false,
708
- "tool_call_penalty": 1.0,
709
- "reward_content_type": "smart",
710
- "reward_format_weight": 0.2,
711
- "reward_content_weight": 0.7,
712
- "think_reward_weight": 0.1,
713
- "think_len_min": 16,
714
- "think_len_max": 64,
715
- "use_lora": false,
716
- "num_rollout_samples": 3,
717
- "ppo_batch_size": 1,
718
- "grpo_beta": 0.04,
719
- "learning_rate": 1.4e-06,
720
- "optimizer_beta1": 0.9,
721
- "optimizer_beta2": 0.95,
722
- "optimizer_weight_decay": 0.01,
723
- "grad_clip_norm": 0.35,
724
- "save_optimizer_state": false,
725
- "lr_schedule_config": {
726
- "name": "cosine_decay",
727
- "arguments": [
728
- 1.4e-06,
729
- 60000,
730
- 2e-07
731
- ],
732
- "warmup": 4000,
733
- "warmup_init": 2e-07
734
- },
735
- "grad_accum_steps": 2,
736
- "num_training_steps": 45869,
737
- "save_every": 10,
738
- "eval_every": 0,
739
- "seed": 15572,
740
- "shuffle_data": true,
741
- "use_grad_checkpointing": false,
742
- "grad_checkpoint_layers": 0,
743
- "log_samples_every": 1,
744
- "max_logged_samples": 50,
745
- "log_prompts": true,
746
- "sample_log_path": null,
747
- "kv_bits": 0,
748
- "kv_group_size": 64,
749
- "quantized_kv_start": 0,
750
- "verbose": true,
751
- "use_wandb": true,
752
- "wandb_project": "reasonable-qwen-4b-mlxv2isi",
753
- "wandb_entity": null,
754
- "wandb_run_name": null,
755
- "resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last7/checkpoint_20250903_224014_periodic_update_2510",
756
- "allow_cross_arch_ref": true,
757
- "align_bridge_path": null,
758
- "align_bridge_weight": 1.0,
759
- "align_pool": "mean",
760
- "align_after_tag": "<|start|>assistant<|channel|>analysis<|message|>",
761
- "effective_batch_size": 4
762
- }
763
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint_20250903_232617_shutdown_signal_update_2602/vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef812c383c7dcc573b414fe0322217c5b5519c991a700b7f9c2eca121ef0a283
3
+ size 8044982009
model.safetensors.index.json DELETED
@@ -1,406 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 8044936192,
4
- "total_parameters": 4022468096
5
- },
6
- "weight_map": {
7
- "model.embed_tokens.weight": "model.safetensors",
8
- "model.layers.0.input_layernorm.weight": "model.safetensors",
9
- "model.layers.0.mlp.down_proj.weight": "model.safetensors",
10
- "model.layers.0.mlp.gate_proj.weight": "model.safetensors",
11
- "model.layers.0.mlp.up_proj.weight": "model.safetensors",
12
- "model.layers.0.post_attention_layernorm.weight": "model.safetensors",
13
- "model.layers.0.self_attn.k_norm.weight": "model.safetensors",
14
- "model.layers.0.self_attn.k_proj.weight": "model.safetensors",
15
- "model.layers.0.self_attn.o_proj.weight": "model.safetensors",
16
- "model.layers.0.self_attn.q_norm.weight": "model.safetensors",
17
- "model.layers.0.self_attn.q_proj.weight": "model.safetensors",
18
- "model.layers.0.self_attn.v_proj.weight": "model.safetensors",
19
- "model.layers.1.input_layernorm.weight": "model.safetensors",
20
- "model.layers.1.mlp.down_proj.weight": "model.safetensors",
21
- "model.layers.1.mlp.gate_proj.weight": "model.safetensors",
22
- "model.layers.1.mlp.up_proj.weight": "model.safetensors",
23
- "model.layers.1.post_attention_layernorm.weight": "model.safetensors",
24
- "model.layers.1.self_attn.k_norm.weight": "model.safetensors",
25
- "model.layers.1.self_attn.k_proj.weight": "model.safetensors",
26
- "model.layers.1.self_attn.o_proj.weight": "model.safetensors",
27
- "model.layers.1.self_attn.q_norm.weight": "model.safetensors",
28
- "model.layers.1.self_attn.q_proj.weight": "model.safetensors",
29
- "model.layers.1.self_attn.v_proj.weight": "model.safetensors",
30
- "model.layers.10.input_layernorm.weight": "model.safetensors",
31
- "model.layers.10.mlp.down_proj.weight": "model.safetensors",
32
- "model.layers.10.mlp.gate_proj.weight": "model.safetensors",
33
- "model.layers.10.mlp.up_proj.weight": "model.safetensors",
34
- "model.layers.10.post_attention_layernorm.weight": "model.safetensors",
35
- "model.layers.10.self_attn.k_norm.weight": "model.safetensors",
36
- "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
37
- "model.layers.10.self_attn.o_proj.weight": "model.safetensors",
38
- "model.layers.10.self_attn.q_norm.weight": "model.safetensors",
39
- "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
40
- "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
41
- "model.layers.11.input_layernorm.weight": "model.safetensors",
42
- "model.layers.11.mlp.down_proj.weight": "model.safetensors",
43
- "model.layers.11.mlp.gate_proj.weight": "model.safetensors",
44
- "model.layers.11.mlp.up_proj.weight": "model.safetensors",
45
- "model.layers.11.post_attention_layernorm.weight": "model.safetensors",
46
- "model.layers.11.self_attn.k_norm.weight": "model.safetensors",
47
- "model.layers.11.self_attn.k_proj.weight": "model.safetensors",
48
- "model.layers.11.self_attn.o_proj.weight": "model.safetensors",
49
- "model.layers.11.self_attn.q_norm.weight": "model.safetensors",
50
- "model.layers.11.self_attn.q_proj.weight": "model.safetensors",
51
- "model.layers.11.self_attn.v_proj.weight": "model.safetensors",
52
- "model.layers.12.input_layernorm.weight": "model.safetensors",
53
- "model.layers.12.mlp.down_proj.weight": "model.safetensors",
54
- "model.layers.12.mlp.gate_proj.weight": "model.safetensors",
55
- "model.layers.12.mlp.up_proj.weight": "model.safetensors",
56
- "model.layers.12.post_attention_layernorm.weight": "model.safetensors",
57
- "model.layers.12.self_attn.k_norm.weight": "model.safetensors",
58
- "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
59
- "model.layers.12.self_attn.o_proj.weight": "model.safetensors",
60
- "model.layers.12.self_attn.q_norm.weight": "model.safetensors",
61
- "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
62
- "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
63
- "model.layers.13.input_layernorm.weight": "model.safetensors",
64
- "model.layers.13.mlp.down_proj.weight": "model.safetensors",
65
- "model.layers.13.mlp.gate_proj.weight": "model.safetensors",
66
- "model.layers.13.mlp.up_proj.weight": "model.safetensors",
67
- "model.layers.13.post_attention_layernorm.weight": "model.safetensors",
68
- "model.layers.13.self_attn.k_norm.weight": "model.safetensors",
69
- "model.layers.13.self_attn.k_proj.weight": "model.safetensors",
70
- "model.layers.13.self_attn.o_proj.weight": "model.safetensors",
71
- "model.layers.13.self_attn.q_norm.weight": "model.safetensors",
72
- "model.layers.13.self_attn.q_proj.weight": "model.safetensors",
73
- "model.layers.13.self_attn.v_proj.weight": "model.safetensors",
74
- "model.layers.14.input_layernorm.weight": "model.safetensors",
75
- "model.layers.14.mlp.down_proj.weight": "model.safetensors",
76
- "model.layers.14.mlp.gate_proj.weight": "model.safetensors",
77
- "model.layers.14.mlp.up_proj.weight": "model.safetensors",
78
- "model.layers.14.post_attention_layernorm.weight": "model.safetensors",
79
- "model.layers.14.self_attn.k_norm.weight": "model.safetensors",
80
- "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
81
- "model.layers.14.self_attn.o_proj.weight": "model.safetensors",
82
- "model.layers.14.self_attn.q_norm.weight": "model.safetensors",
83
- "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
84
- "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
85
- "model.layers.15.input_layernorm.weight": "model.safetensors",
86
- "model.layers.15.mlp.down_proj.weight": "model.safetensors",
87
- "model.layers.15.mlp.gate_proj.weight": "model.safetensors",
88
- "model.layers.15.mlp.up_proj.weight": "model.safetensors",
89
- "model.layers.15.post_attention_layernorm.weight": "model.safetensors",
90
- "model.layers.15.self_attn.k_norm.weight": "model.safetensors",
91
- "model.layers.15.self_attn.k_proj.weight": "model.safetensors",
92
- "model.layers.15.self_attn.o_proj.weight": "model.safetensors",
93
- "model.layers.15.self_attn.q_norm.weight": "model.safetensors",
94
- "model.layers.15.self_attn.q_proj.weight": "model.safetensors",
95
- "model.layers.15.self_attn.v_proj.weight": "model.safetensors",
96
- "model.layers.16.input_layernorm.weight": "model.safetensors",
97
- "model.layers.16.mlp.down_proj.weight": "model.safetensors",
98
- "model.layers.16.mlp.gate_proj.weight": "model.safetensors",
99
- "model.layers.16.mlp.up_proj.weight": "model.safetensors",
100
- "model.layers.16.post_attention_layernorm.weight": "model.safetensors",
101
- "model.layers.16.self_attn.k_norm.weight": "model.safetensors",
102
- "model.layers.16.self_attn.k_proj.weight": "model.safetensors",
103
- "model.layers.16.self_attn.o_proj.weight": "model.safetensors",
104
- "model.layers.16.self_attn.q_norm.weight": "model.safetensors",
105
- "model.layers.16.self_attn.q_proj.weight": "model.safetensors",
106
- "model.layers.16.self_attn.v_proj.weight": "model.safetensors",
107
- "model.layers.17.input_layernorm.weight": "model.safetensors",
108
- "model.layers.17.mlp.down_proj.weight": "model.safetensors",
109
- "model.layers.17.mlp.gate_proj.weight": "model.safetensors",
110
- "model.layers.17.mlp.up_proj.weight": "model.safetensors",
111
- "model.layers.17.post_attention_layernorm.weight": "model.safetensors",
112
- "model.layers.17.self_attn.k_norm.weight": "model.safetensors",
113
- "model.layers.17.self_attn.k_proj.weight": "model.safetensors",
114
- "model.layers.17.self_attn.o_proj.weight": "model.safetensors",
115
- "model.layers.17.self_attn.q_norm.weight": "model.safetensors",
116
- "model.layers.17.self_attn.q_proj.weight": "model.safetensors",
117
- "model.layers.17.self_attn.v_proj.weight": "model.safetensors",
118
- "model.layers.18.input_layernorm.weight": "model.safetensors",
119
- "model.layers.18.mlp.down_proj.weight": "model.safetensors",
120
- "model.layers.18.mlp.gate_proj.weight": "model.safetensors",
121
- "model.layers.18.mlp.up_proj.weight": "model.safetensors",
122
- "model.layers.18.post_attention_layernorm.weight": "model.safetensors",
123
- "model.layers.18.self_attn.k_norm.weight": "model.safetensors",
124
- "model.layers.18.self_attn.k_proj.weight": "model.safetensors",
125
- "model.layers.18.self_attn.o_proj.weight": "model.safetensors",
126
- "model.layers.18.self_attn.q_norm.weight": "model.safetensors",
127
- "model.layers.18.self_attn.q_proj.weight": "model.safetensors",
128
- "model.layers.18.self_attn.v_proj.weight": "model.safetensors",
129
- "model.layers.19.input_layernorm.weight": "model.safetensors",
130
- "model.layers.19.mlp.down_proj.weight": "model.safetensors",
131
- "model.layers.19.mlp.gate_proj.weight": "model.safetensors",
132
- "model.layers.19.mlp.up_proj.weight": "model.safetensors",
133
- "model.layers.19.post_attention_layernorm.weight": "model.safetensors",
134
- "model.layers.19.self_attn.k_norm.weight": "model.safetensors",
135
- "model.layers.19.self_attn.k_proj.weight": "model.safetensors",
136
- "model.layers.19.self_attn.o_proj.weight": "model.safetensors",
137
- "model.layers.19.self_attn.q_norm.weight": "model.safetensors",
138
- "model.layers.19.self_attn.q_proj.weight": "model.safetensors",
139
- "model.layers.19.self_attn.v_proj.weight": "model.safetensors",
140
- "model.layers.2.input_layernorm.weight": "model.safetensors",
141
- "model.layers.2.mlp.down_proj.weight": "model.safetensors",
142
- "model.layers.2.mlp.gate_proj.weight": "model.safetensors",
143
- "model.layers.2.mlp.up_proj.weight": "model.safetensors",
144
- "model.layers.2.post_attention_layernorm.weight": "model.safetensors",
145
- "model.layers.2.self_attn.k_norm.weight": "model.safetensors",
146
- "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
147
- "model.layers.2.self_attn.o_proj.weight": "model.safetensors",
148
- "model.layers.2.self_attn.q_norm.weight": "model.safetensors",
149
- "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
150
- "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
151
- "model.layers.20.input_layernorm.weight": "model.safetensors",
152
- "model.layers.20.mlp.down_proj.weight": "model.safetensors",
153
- "model.layers.20.mlp.gate_proj.weight": "model.safetensors",
154
- "model.layers.20.mlp.up_proj.weight": "model.safetensors",
155
- "model.layers.20.post_attention_layernorm.weight": "model.safetensors",
156
- "model.layers.20.self_attn.k_norm.weight": "model.safetensors",
157
- "model.layers.20.self_attn.k_proj.weight": "model.safetensors",
158
- "model.layers.20.self_attn.o_proj.weight": "model.safetensors",
159
- "model.layers.20.self_attn.q_norm.weight": "model.safetensors",
160
- "model.layers.20.self_attn.q_proj.weight": "model.safetensors",
161
- "model.layers.20.self_attn.v_proj.weight": "model.safetensors",
162
- "model.layers.21.input_layernorm.weight": "model.safetensors",
163
- "model.layers.21.mlp.down_proj.weight": "model.safetensors",
164
- "model.layers.21.mlp.gate_proj.weight": "model.safetensors",
165
- "model.layers.21.mlp.up_proj.weight": "model.safetensors",
166
- "model.layers.21.post_attention_layernorm.weight": "model.safetensors",
167
- "model.layers.21.self_attn.k_norm.weight": "model.safetensors",
168
- "model.layers.21.self_attn.k_proj.weight": "model.safetensors",
169
- "model.layers.21.self_attn.o_proj.weight": "model.safetensors",
170
- "model.layers.21.self_attn.q_norm.weight": "model.safetensors",
171
- "model.layers.21.self_attn.q_proj.weight": "model.safetensors",
172
- "model.layers.21.self_attn.v_proj.weight": "model.safetensors",
173
- "model.layers.22.input_layernorm.weight": "model.safetensors",
174
- "model.layers.22.mlp.down_proj.weight": "model.safetensors",
175
- "model.layers.22.mlp.gate_proj.weight": "model.safetensors",
176
- "model.layers.22.mlp.up_proj.weight": "model.safetensors",
177
- "model.layers.22.post_attention_layernorm.weight": "model.safetensors",
178
- "model.layers.22.self_attn.k_norm.weight": "model.safetensors",
179
- "model.layers.22.self_attn.k_proj.weight": "model.safetensors",
180
- "model.layers.22.self_attn.o_proj.weight": "model.safetensors",
181
- "model.layers.22.self_attn.q_norm.weight": "model.safetensors",
182
- "model.layers.22.self_attn.q_proj.weight": "model.safetensors",
183
- "model.layers.22.self_attn.v_proj.weight": "model.safetensors",
184
- "model.layers.23.input_layernorm.weight": "model.safetensors",
185
- "model.layers.23.mlp.down_proj.weight": "model.safetensors",
186
- "model.layers.23.mlp.gate_proj.weight": "model.safetensors",
187
- "model.layers.23.mlp.up_proj.weight": "model.safetensors",
188
- "model.layers.23.post_attention_layernorm.weight": "model.safetensors",
189
- "model.layers.23.self_attn.k_norm.weight": "model.safetensors",
190
- "model.layers.23.self_attn.k_proj.weight": "model.safetensors",
191
- "model.layers.23.self_attn.o_proj.weight": "model.safetensors",
192
- "model.layers.23.self_attn.q_norm.weight": "model.safetensors",
193
- "model.layers.23.self_attn.q_proj.weight": "model.safetensors",
194
- "model.layers.23.self_attn.v_proj.weight": "model.safetensors",
195
- "model.layers.24.input_layernorm.weight": "model.safetensors",
196
- "model.layers.24.mlp.down_proj.weight": "model.safetensors",
197
- "model.layers.24.mlp.gate_proj.weight": "model.safetensors",
198
- "model.layers.24.mlp.up_proj.weight": "model.safetensors",
199
- "model.layers.24.post_attention_layernorm.weight": "model.safetensors",
200
- "model.layers.24.self_attn.k_norm.weight": "model.safetensors",
201
- "model.layers.24.self_attn.k_proj.weight": "model.safetensors",
202
- "model.layers.24.self_attn.o_proj.weight": "model.safetensors",
203
- "model.layers.24.self_attn.q_norm.weight": "model.safetensors",
204
- "model.layers.24.self_attn.q_proj.weight": "model.safetensors",
205
- "model.layers.24.self_attn.v_proj.weight": "model.safetensors",
206
- "model.layers.25.input_layernorm.weight": "model.safetensors",
207
- "model.layers.25.mlp.down_proj.weight": "model.safetensors",
208
- "model.layers.25.mlp.gate_proj.weight": "model.safetensors",
209
- "model.layers.25.mlp.up_proj.weight": "model.safetensors",
210
- "model.layers.25.post_attention_layernorm.weight": "model.safetensors",
211
- "model.layers.25.self_attn.k_norm.weight": "model.safetensors",
212
- "model.layers.25.self_attn.k_proj.weight": "model.safetensors",
213
- "model.layers.25.self_attn.o_proj.weight": "model.safetensors",
214
- "model.layers.25.self_attn.q_norm.weight": "model.safetensors",
215
- "model.layers.25.self_attn.q_proj.weight": "model.safetensors",
216
- "model.layers.25.self_attn.v_proj.weight": "model.safetensors",
217
- "model.layers.26.input_layernorm.weight": "model.safetensors",
218
- "model.layers.26.mlp.down_proj.weight": "model.safetensors",
219
- "model.layers.26.mlp.gate_proj.weight": "model.safetensors",
220
- "model.layers.26.mlp.up_proj.weight": "model.safetensors",
221
- "model.layers.26.post_attention_layernorm.weight": "model.safetensors",
222
- "model.layers.26.self_attn.k_norm.weight": "model.safetensors",
223
- "model.layers.26.self_attn.k_proj.weight": "model.safetensors",
224
- "model.layers.26.self_attn.o_proj.weight": "model.safetensors",
225
- "model.layers.26.self_attn.q_norm.weight": "model.safetensors",
226
- "model.layers.26.self_attn.q_proj.weight": "model.safetensors",
227
- "model.layers.26.self_attn.v_proj.weight": "model.safetensors",
228
- "model.layers.27.input_layernorm.weight": "model.safetensors",
229
- "model.layers.27.mlp.down_proj.weight": "model.safetensors",
230
- "model.layers.27.mlp.gate_proj.weight": "model.safetensors",
231
- "model.layers.27.mlp.up_proj.weight": "model.safetensors",
232
- "model.layers.27.post_attention_layernorm.weight": "model.safetensors",
233
- "model.layers.27.self_attn.k_norm.weight": "model.safetensors",
234
- "model.layers.27.self_attn.k_proj.weight": "model.safetensors",
235
- "model.layers.27.self_attn.o_proj.weight": "model.safetensors",
236
- "model.layers.27.self_attn.q_norm.weight": "model.safetensors",
237
- "model.layers.27.self_attn.q_proj.weight": "model.safetensors",
238
- "model.layers.27.self_attn.v_proj.weight": "model.safetensors",
239
- "model.layers.28.input_layernorm.weight": "model.safetensors",
240
- "model.layers.28.mlp.down_proj.weight": "model.safetensors",
241
- "model.layers.28.mlp.gate_proj.weight": "model.safetensors",
242
- "model.layers.28.mlp.up_proj.weight": "model.safetensors",
243
- "model.layers.28.post_attention_layernorm.weight": "model.safetensors",
244
- "model.layers.28.self_attn.k_norm.weight": "model.safetensors",
245
- "model.layers.28.self_attn.k_proj.weight": "model.safetensors",
246
- "model.layers.28.self_attn.o_proj.weight": "model.safetensors",
247
- "model.layers.28.self_attn.q_norm.weight": "model.safetensors",
248
- "model.layers.28.self_attn.q_proj.weight": "model.safetensors",
249
- "model.layers.28.self_attn.v_proj.weight": "model.safetensors",
250
- "model.layers.29.input_layernorm.weight": "model.safetensors",
251
- "model.layers.29.mlp.down_proj.weight": "model.safetensors",
252
- "model.layers.29.mlp.gate_proj.weight": "model.safetensors",
253
- "model.layers.29.mlp.up_proj.weight": "model.safetensors",
254
- "model.layers.29.post_attention_layernorm.weight": "model.safetensors",
255
- "model.layers.29.self_attn.k_norm.weight": "model.safetensors",
256
- "model.layers.29.self_attn.k_proj.weight": "model.safetensors",
257
- "model.layers.29.self_attn.o_proj.weight": "model.safetensors",
258
- "model.layers.29.self_attn.q_norm.weight": "model.safetensors",
259
- "model.layers.29.self_attn.q_proj.weight": "model.safetensors",
260
- "model.layers.29.self_attn.v_proj.weight": "model.safetensors",
261
- "model.layers.3.input_layernorm.weight": "model.safetensors",
262
- "model.layers.3.mlp.down_proj.weight": "model.safetensors",
263
- "model.layers.3.mlp.gate_proj.weight": "model.safetensors",
264
- "model.layers.3.mlp.up_proj.weight": "model.safetensors",
265
- "model.layers.3.post_attention_layernorm.weight": "model.safetensors",
266
- "model.layers.3.self_attn.k_norm.weight": "model.safetensors",
267
- "model.layers.3.self_attn.k_proj.weight": "model.safetensors",
268
- "model.layers.3.self_attn.o_proj.weight": "model.safetensors",
269
- "model.layers.3.self_attn.q_norm.weight": "model.safetensors",
270
- "model.layers.3.self_attn.q_proj.weight": "model.safetensors",
271
- "model.layers.3.self_attn.v_proj.weight": "model.safetensors",
272
- "model.layers.30.input_layernorm.weight": "model.safetensors",
273
- "model.layers.30.mlp.down_proj.weight": "model.safetensors",
274
- "model.layers.30.mlp.gate_proj.weight": "model.safetensors",
275
- "model.layers.30.mlp.up_proj.weight": "model.safetensors",
276
- "model.layers.30.post_attention_layernorm.weight": "model.safetensors",
277
- "model.layers.30.self_attn.k_norm.weight": "model.safetensors",
278
- "model.layers.30.self_attn.k_proj.weight": "model.safetensors",
279
- "model.layers.30.self_attn.o_proj.weight": "model.safetensors",
280
- "model.layers.30.self_attn.q_norm.weight": "model.safetensors",
281
- "model.layers.30.self_attn.q_proj.weight": "model.safetensors",
282
- "model.layers.30.self_attn.v_proj.weight": "model.safetensors",
283
- "model.layers.31.input_layernorm.weight": "model.safetensors",
284
- "model.layers.31.mlp.down_proj.weight": "model.safetensors",
285
- "model.layers.31.mlp.gate_proj.weight": "model.safetensors",
286
- "model.layers.31.mlp.up_proj.weight": "model.safetensors",
287
- "model.layers.31.post_attention_layernorm.weight": "model.safetensors",
288
- "model.layers.31.self_attn.k_norm.weight": "model.safetensors",
289
- "model.layers.31.self_attn.k_proj.weight": "model.safetensors",
290
- "model.layers.31.self_attn.o_proj.weight": "model.safetensors",
291
- "model.layers.31.self_attn.q_norm.weight": "model.safetensors",
292
- "model.layers.31.self_attn.q_proj.weight": "model.safetensors",
293
- "model.layers.31.self_attn.v_proj.weight": "model.safetensors",
294
- "model.layers.32.input_layernorm.weight": "model.safetensors",
295
- "model.layers.32.mlp.down_proj.weight": "model.safetensors",
296
- "model.layers.32.mlp.gate_proj.weight": "model.safetensors",
297
- "model.layers.32.mlp.up_proj.weight": "model.safetensors",
298
- "model.layers.32.post_attention_layernorm.weight": "model.safetensors",
299
- "model.layers.32.self_attn.k_norm.weight": "model.safetensors",
300
- "model.layers.32.self_attn.k_proj.weight": "model.safetensors",
301
- "model.layers.32.self_attn.o_proj.weight": "model.safetensors",
302
- "model.layers.32.self_attn.q_norm.weight": "model.safetensors",
303
- "model.layers.32.self_attn.q_proj.weight": "model.safetensors",
304
- "model.layers.32.self_attn.v_proj.weight": "model.safetensors",
305
- "model.layers.33.input_layernorm.weight": "model.safetensors",
306
- "model.layers.33.mlp.down_proj.weight": "model.safetensors",
307
- "model.layers.33.mlp.gate_proj.weight": "model.safetensors",
308
- "model.layers.33.mlp.up_proj.weight": "model.safetensors",
309
- "model.layers.33.post_attention_layernorm.weight": "model.safetensors",
310
- "model.layers.33.self_attn.k_norm.weight": "model.safetensors",
311
- "model.layers.33.self_attn.k_proj.weight": "model.safetensors",
312
- "model.layers.33.self_attn.o_proj.weight": "model.safetensors",
313
- "model.layers.33.self_attn.q_norm.weight": "model.safetensors",
314
- "model.layers.33.self_attn.q_proj.weight": "model.safetensors",
315
- "model.layers.33.self_attn.v_proj.weight": "model.safetensors",
316
- "model.layers.34.input_layernorm.weight": "model.safetensors",
317
- "model.layers.34.mlp.down_proj.weight": "model.safetensors",
318
- "model.layers.34.mlp.gate_proj.weight": "model.safetensors",
319
- "model.layers.34.mlp.up_proj.weight": "model.safetensors",
320
- "model.layers.34.post_attention_layernorm.weight": "model.safetensors",
321
- "model.layers.34.self_attn.k_norm.weight": "model.safetensors",
322
- "model.layers.34.self_attn.k_proj.weight": "model.safetensors",
323
- "model.layers.34.self_attn.o_proj.weight": "model.safetensors",
324
- "model.layers.34.self_attn.q_norm.weight": "model.safetensors",
325
- "model.layers.34.self_attn.q_proj.weight": "model.safetensors",
326
- "model.layers.34.self_attn.v_proj.weight": "model.safetensors",
327
- "model.layers.35.input_layernorm.weight": "model.safetensors",
328
- "model.layers.35.mlp.down_proj.weight": "model.safetensors",
329
- "model.layers.35.mlp.gate_proj.weight": "model.safetensors",
330
- "model.layers.35.mlp.up_proj.weight": "model.safetensors",
331
- "model.layers.35.post_attention_layernorm.weight": "model.safetensors",
332
- "model.layers.35.self_attn.k_norm.weight": "model.safetensors",
333
- "model.layers.35.self_attn.k_proj.weight": "model.safetensors",
334
- "model.layers.35.self_attn.o_proj.weight": "model.safetensors",
335
- "model.layers.35.self_attn.q_norm.weight": "model.safetensors",
336
- "model.layers.35.self_attn.q_proj.weight": "model.safetensors",
337
- "model.layers.35.self_attn.v_proj.weight": "model.safetensors",
338
- "model.layers.4.input_layernorm.weight": "model.safetensors",
339
- "model.layers.4.mlp.down_proj.weight": "model.safetensors",
340
- "model.layers.4.mlp.gate_proj.weight": "model.safetensors",
341
- "model.layers.4.mlp.up_proj.weight": "model.safetensors",
342
- "model.layers.4.post_attention_layernorm.weight": "model.safetensors",
343
- "model.layers.4.self_attn.k_norm.weight": "model.safetensors",
344
- "model.layers.4.self_attn.k_proj.weight": "model.safetensors",
345
- "model.layers.4.self_attn.o_proj.weight": "model.safetensors",
346
- "model.layers.4.self_attn.q_norm.weight": "model.safetensors",
347
- "model.layers.4.self_attn.q_proj.weight": "model.safetensors",
348
- "model.layers.4.self_attn.v_proj.weight": "model.safetensors",
349
- "model.layers.5.input_layernorm.weight": "model.safetensors",
350
- "model.layers.5.mlp.down_proj.weight": "model.safetensors",
351
- "model.layers.5.mlp.gate_proj.weight": "model.safetensors",
352
- "model.layers.5.mlp.up_proj.weight": "model.safetensors",
353
- "model.layers.5.post_attention_layernorm.weight": "model.safetensors",
354
- "model.layers.5.self_attn.k_norm.weight": "model.safetensors",
355
- "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
356
- "model.layers.5.self_attn.o_proj.weight": "model.safetensors",
357
- "model.layers.5.self_attn.q_norm.weight": "model.safetensors",
358
- "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
359
- "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
360
- "model.layers.6.input_layernorm.weight": "model.safetensors",
361
- "model.layers.6.mlp.down_proj.weight": "model.safetensors",
362
- "model.layers.6.mlp.gate_proj.weight": "model.safetensors",
363
- "model.layers.6.mlp.up_proj.weight": "model.safetensors",
364
- "model.layers.6.post_attention_layernorm.weight": "model.safetensors",
365
- "model.layers.6.self_attn.k_norm.weight": "model.safetensors",
366
- "model.layers.6.self_attn.k_proj.weight": "model.safetensors",
367
- "model.layers.6.self_attn.o_proj.weight": "model.safetensors",
368
- "model.layers.6.self_attn.q_norm.weight": "model.safetensors",
369
- "model.layers.6.self_attn.q_proj.weight": "model.safetensors",
370
- "model.layers.6.self_attn.v_proj.weight": "model.safetensors",
371
- "model.layers.7.input_layernorm.weight": "model.safetensors",
372
- "model.layers.7.mlp.down_proj.weight": "model.safetensors",
373
- "model.layers.7.mlp.gate_proj.weight": "model.safetensors",
374
- "model.layers.7.mlp.up_proj.weight": "model.safetensors",
375
- "model.layers.7.post_attention_layernorm.weight": "model.safetensors",
376
- "model.layers.7.self_attn.k_norm.weight": "model.safetensors",
377
- "model.layers.7.self_attn.k_proj.weight": "model.safetensors",
378
- "model.layers.7.self_attn.o_proj.weight": "model.safetensors",
379
- "model.layers.7.self_attn.q_norm.weight": "model.safetensors",
380
- "model.layers.7.self_attn.q_proj.weight": "model.safetensors",
381
- "model.layers.7.self_attn.v_proj.weight": "model.safetensors",
382
- "model.layers.8.input_layernorm.weight": "model.safetensors",
383
- "model.layers.8.mlp.down_proj.weight": "model.safetensors",
384
- "model.layers.8.mlp.gate_proj.weight": "model.safetensors",
385
- "model.layers.8.mlp.up_proj.weight": "model.safetensors",
386
- "model.layers.8.post_attention_layernorm.weight": "model.safetensors",
387
- "model.layers.8.self_attn.k_norm.weight": "model.safetensors",
388
- "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
389
- "model.layers.8.self_attn.o_proj.weight": "model.safetensors",
390
- "model.layers.8.self_attn.q_norm.weight": "model.safetensors",
391
- "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
392
- "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
393
- "model.layers.9.input_layernorm.weight": "model.safetensors",
394
- "model.layers.9.mlp.down_proj.weight": "model.safetensors",
395
- "model.layers.9.mlp.gate_proj.weight": "model.safetensors",
396
- "model.layers.9.mlp.up_proj.weight": "model.safetensors",
397
- "model.layers.9.post_attention_layernorm.weight": "model.safetensors",
398
- "model.layers.9.self_attn.k_norm.weight": "model.safetensors",
399
- "model.layers.9.self_attn.k_proj.weight": "model.safetensors",
400
- "model.layers.9.self_attn.o_proj.weight": "model.safetensors",
401
- "model.layers.9.self_attn.q_norm.weight": "model.safetensors",
402
- "model.layers.9.self_attn.q_proj.weight": "model.safetensors",
403
- "model.layers.9.self_attn.v_proj.weight": "model.safetensors",
404
- "model.norm.weight": "model.safetensors"
405
- }
406
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
plots/loss_vs_updates.png DELETED
Binary file (40.5 kB)
 
plots/lr_vs_updates.png DELETED
Binary file (46.9 kB)
 
plots/reward_vs_updates.png DELETED
Binary file (58.8 kB)
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c73c38bcfda11e5e9cbed5db75854b6d0fdb3be4bbe54cffae22ad886a27137
3
+ size 11424535
training_state.json DELETED
@@ -1,763 +0,0 @@
1
- {
2
- "global_step": 1170,
3
- "num_updates": 1170,
4
- "use_lora": false,
5
- "rng_state": {
6
- "python": [
7
- 3,
8
- [
9
- 88433409,
10
- 3831378706,
11
- 2254718744,
12
- 3124532325,
13
- 1870680829,
14
- 287749387,
15
- 3447846173,
16
- 3949328054,
17
- 1423803997,
18
- 243075628,
19
- 3410472399,
20
- 4291626751,
21
- 3544287654,
22
- 2772540057,
23
- 1141969636,
24
- 792671283,
25
- 1143722062,
26
- 3917124382,
27
- 1873685467,
28
- 2825564264,
29
- 577430928,
30
- 3961905670,
31
- 1597653823,
32
- 3974344184,
33
- 187968963,
34
- 3720525757,
35
- 3136757766,
36
- 3149625810,
37
- 3542978814,
38
- 2043851790,
39
- 1406546975,
40
- 3515229815,
41
- 1971827767,
42
- 2621571898,
43
- 420001427,
44
- 1411697076,
45
- 3344524717,
46
- 1058190081,
47
- 2748006135,
48
- 3479097891,
49
- 487321201,
50
- 3631996109,
51
- 2537270012,
52
- 2608966632,
53
- 710474667,
54
- 2323016772,
55
- 3596820456,
56
- 1185526171,
57
- 4074881436,
58
- 2932887847,
59
- 1980325142,
60
- 1341823072,
61
- 1671725107,
62
- 606816074,
63
- 4183244382,
64
- 1906142123,
65
- 4216369577,
66
- 2964173476,
67
- 4067683137,
68
- 3577623281,
69
- 2751451114,
70
- 525849254,
71
- 498730114,
72
- 1613819194,
73
- 3067945879,
74
- 2856159025,
75
- 772320633,
76
- 3448944950,
77
- 460990664,
78
- 2065935992,
79
- 1895423998,
80
- 2399377116,
81
- 1502185808,
82
- 32393732,
83
- 1698335152,
84
- 81009662,
85
- 2240252988,
86
- 4085637238,
87
- 1620133323,
88
- 3479175556,
89
- 1340811264,
90
- 3942843638,
91
- 2093402992,
92
- 3273966663,
93
- 182263349,
94
- 1888196096,
95
- 1432765667,
96
- 318467058,
97
- 1081558690,
98
- 2808477935,
99
- 1077711974,
100
- 1621291395,
101
- 1277555732,
102
- 153546595,
103
- 1931993913,
104
- 670335560,
105
- 2224784695,
106
- 3293586766,
107
- 1098169583,
108
- 1124260106,
109
- 1583549685,
110
- 1543594932,
111
- 3610215130,
112
- 1942116738,
113
- 3465276485,
114
- 1472649529,
115
- 2785362770,
116
- 11122647,
117
- 2871168363,
118
- 3605045875,
119
- 1436086857,
120
- 1992015531,
121
- 2251602449,
122
- 2032078653,
123
- 2011235721,
124
- 3422761212,
125
- 533288666,
126
- 169258631,
127
- 1306123880,
128
- 3056053144,
129
- 3917548099,
130
- 2273250686,
131
- 3571151177,
132
- 2436271223,
133
- 4115144236,
134
- 317789591,
135
- 1566000295,
136
- 311498837,
137
- 3690527042,
138
- 3229620032,
139
- 4011580485,
140
- 1218474694,
141
- 187868606,
142
- 2636294475,
143
- 2399920460,
144
- 116828745,
145
- 3342270071,
146
- 3266649763,
147
- 3872561671,
148
- 411380297,
149
- 3271629686,
150
- 65062469,
151
- 193153987,
152
- 2111927589,
153
- 2744014947,
154
- 2527217086,
155
- 224930943,
156
- 1917332886,
157
- 3431250047,
158
- 981441600,
159
- 1147445640,
160
- 2460571710,
161
- 3069543174,
162
- 1984643935,
163
- 1967394565,
164
- 3318142388,
165
- 3620666162,
166
- 2949301306,
167
- 3981554569,
168
- 2104333466,
169
- 3458143967,
170
- 1218641839,
171
- 4234154823,
172
- 2367325999,
173
- 615244407,
174
- 891373979,
175
- 664834184,
176
- 3228241020,
177
- 419790255,
178
- 4246105607,
179
- 2165432894,
180
- 2011893032,
181
- 3506348715,
182
- 4102440825,
183
- 75342742,
184
- 780322583,
185
- 903711565,
186
- 2820344521,
187
- 2352792055,
188
- 2674787198,
189
- 473489289,
190
- 2254569435,
191
- 1901494810,
192
- 2998425969,
193
- 2979621586,
194
- 4137707221,
195
- 474036005,
196
- 281302580,
197
- 1242186001,
198
- 3142425247,
199
- 3055060083,
200
- 1660759950,
201
- 4070331092,
202
- 3678870707,
203
- 2937711110,
204
- 2933412383,
205
- 307656978,
206
- 2242779280,
207
- 636656000,
208
- 1286397477,
209
- 2872710772,
210
- 3393487356,
211
- 877876473,
212
- 2448336526,
213
- 1866067529,
214
- 762567813,
215
- 4035616500,
216
- 3571396236,
217
- 2842294297,
218
- 1396536016,
219
- 2011398339,
220
- 699445832,
221
- 1264995189,
222
- 371442626,
223
- 2593945616,
224
- 4057126447,
225
- 2150076893,
226
- 3342683702,
227
- 2284907612,
228
- 197566287,
229
- 844331282,
230
- 1067808142,
231
- 254283823,
232
- 1380810632,
233
- 2846158625,
234
- 985884435,
235
- 2601874760,
236
- 1399532205,
237
- 2452815544,
238
- 3721822107,
239
- 2414881686,
240
- 1703107108,
241
- 2277801220,
242
- 4082425610,
243
- 1144002166,
244
- 2407468644,
245
- 1530584607,
246
- 3372691519,
247
- 2670544165,
248
- 384647240,
249
- 840469694,
250
- 3160659153,
251
- 3888679998,
252
- 3249019166,
253
- 2604272067,
254
- 830027875,
255
- 2271933835,
256
- 1799755055,
257
- 1603014578,
258
- 2187538856,
259
- 2717813938,
260
- 2040822464,
261
- 3756498985,
262
- 1814572232,
263
- 1613553527,
264
- 2138535491,
265
- 2141101823,
266
- 3935126783,
267
- 3693989649,
268
- 169664205,
269
- 635771242,
270
- 2385018594,
271
- 1779647100,
272
- 1660709281,
273
- 4017470124,
274
- 2895193002,
275
- 2744838504,
276
- 1023568806,
277
- 4002950525,
278
- 2275253474,
279
- 2658933434,
280
- 2421989324,
281
- 694757800,
282
- 1998442136,
283
- 3664344089,
284
- 675515255,
285
- 2263357871,
286
- 2353130827,
287
- 1522603128,
288
- 1081635037,
289
- 495593658,
290
- 1925118214,
291
- 3253986378,
292
- 1514888605,
293
- 4240412220,
294
- 48693514,
295
- 2942622811,
296
- 4178811965,
297
- 1838803473,
298
- 2453154413,
299
- 69796190,
300
- 714322070,
301
- 2212771588,
302
- 3281544869,
303
- 2800731561,
304
- 1747615033,
305
- 3466736859,
306
- 1039287652,
307
- 4257847749,
308
- 2413232209,
309
- 4233791285,
310
- 221767369,
311
- 1580174462,
312
- 3086514569,
313
- 18972175,
314
- 2815922554,
315
- 3293813300,
316
- 2887767542,
317
- 1869843114,
318
- 4157667177,
319
- 2350648693,
320
- 3156952074,
321
- 159224819,
322
- 1440463052,
323
- 1975848388,
324
- 4126268518,
325
- 2898062192,
326
- 1415629207,
327
- 830641200,
328
- 2993317888,
329
- 2562839995,
330
- 3315502303,
331
- 1932757621,
332
- 2808623562,
333
- 2368783935,
334
- 2440999631,
335
- 3751189476,
336
- 2025830533,
337
- 991178336,
338
- 3770572174,
339
- 464542652,
340
- 4035531657,
341
- 3816676001,
342
- 1093375034,
343
- 2654483265,
344
- 2828259190,
345
- 3592041138,
346
- 1496536068,
347
- 2648116935,
348
- 676735297,
349
- 1823204629,
350
- 2563295500,
351
- 1986869362,
352
- 1232844543,
353
- 2280913022,
354
- 3529890815,
355
- 1757103232,
356
- 1356351083,
357
- 1648455129,
358
- 3469130795,
359
- 1858425271,
360
- 1813960773,
361
- 1998971380,
362
- 4155645018,
363
- 359735116,
364
- 1434689661,
365
- 4037235611,
366
- 1788923696,
367
- 1349592814,
368
- 2023945742,
369
- 4171737805,
370
- 3119492423,
371
- 2947138617,
372
- 3697549130,
373
- 1971609798,
374
- 2487285385,
375
- 3489616370,
376
- 2005959754,
377
- 3027606174,
378
- 2972294839,
379
- 364072206,
380
- 3426809825,
381
- 3849365037,
382
- 984352291,
383
- 3073220352,
384
- 3755995930,
385
- 1158211781,
386
- 308806298,
387
- 1926310729,
388
- 487418844,
389
- 1349926520,
390
- 2002477690,
391
- 3032195845,
392
- 636699165,
393
- 809676152,
394
- 1648291192,
395
- 1410277612,
396
- 3072144731,
397
- 3838647786,
398
- 2459945686,
399
- 1946907355,
400
- 2496102582,
401
- 3589873447,
402
- 2788430457,
403
- 1748997848,
404
- 27169846,
405
- 1853630383,
406
- 3715429854,
407
- 1438873584,
408
- 2913648665,
409
- 3338322414,
410
- 3741348761,
411
- 2113378220,
412
- 2149668531,
413
- 2021316124,
414
- 4211622394,
415
- 1806739818,
416
- 1096312849,
417
- 1509856509,
418
- 2978483030,
419
- 983181000,
420
- 4192030429,
421
- 2861323938,
422
- 3721857335,
423
- 1127007302,
424
- 820253711,
425
- 1853880343,
426
- 3844617128,
427
- 3830593027,
428
- 2223768515,
429
- 2237495737,
430
- 4147210204,
431
- 2683114125,
432
- 1676494901,
433
- 1330274023,
434
- 2751067496,
435
- 3595727401,
436
- 793860064,
437
- 1553495353,
438
- 2770064337,
439
- 4217568752,
440
- 793895426,
441
- 304487876,
442
- 3839740796,
443
- 453880664,
444
- 4076742522,
445
- 3638094636,
446
- 4272461034,
447
- 924723712,
448
- 2313693188,
449
- 957874993,
450
- 4045752236,
451
- 1848602374,
452
- 2490313670,
453
- 4229167577,
454
- 3968647495,
455
- 571982538,
456
- 1349164597,
457
- 1527293139,
458
- 2684473225,
459
- 1123109717,
460
- 4125524175,
461
- 1867114010,
462
- 3139395026,
463
- 2514230201,
464
- 2321278517,
465
- 237263888,
466
- 2807313863,
467
- 715356021,
468
- 2965099325,
469
- 2853485236,
470
- 1808770088,
471
- 1491462188,
472
- 2282306322,
473
- 1826887192,
474
- 3482622506,
475
- 3160214329,
476
- 2846741286,
477
- 297679738,
478
- 78044798,
479
- 2180420647,
480
- 4294283288,
481
- 3078757245,
482
- 689073222,
483
- 2928024475,
484
- 2045050690,
485
- 3121221773,
486
- 1744554209,
487
- 3730578802,
488
- 4096172212,
489
- 816300414,
490
- 2154015936,
491
- 2905306257,
492
- 3165587824,
493
- 3101628795,
494
- 1946528699,
495
- 4006251360,
496
- 1858878855,
497
- 306607162,
498
- 3670584545,
499
- 2775263442,
500
- 3019473454,
501
- 1885649240,
502
- 478830246,
503
- 173386865,
504
- 131488732,
505
- 889904861,
506
- 187986497,
507
- 3732887569,
508
- 1942981602,
509
- 1657640617,
510
- 883426275,
511
- 626676545,
512
- 3127146522,
513
- 1369492201,
514
- 2119152187,
515
- 691876777,
516
- 344903938,
517
- 2224770676,
518
- 1201174993,
519
- 4238053429,
520
- 415126681,
521
- 3585621266,
522
- 2893479647,
523
- 465483747,
524
- 1619056575,
525
- 1973441940,
526
- 3049719280,
527
- 1509555909,
528
- 3204623582,
529
- 3583454029,
530
- 3711167974,
531
- 2672833023,
532
- 1716625770,
533
- 333305676,
534
- 3069627543,
535
- 2569430167,
536
- 3008615781,
537
- 1037776311,
538
- 1203514458,
539
- 2480316803,
540
- 3674850750,
541
- 3882071919,
542
- 1123703491,
543
- 1925228774,
544
- 360297051,
545
- 2779325314,
546
- 1250535143,
547
- 3864019182,
548
- 3859893406,
549
- 3786138762,
550
- 4177324002,
551
- 1288978146,
552
- 1886672442,
553
- 1342670880,
554
- 3414114294,
555
- 837797328,
556
- 3311596167,
557
- 3262380560,
558
- 364381458,
559
- 2526785561,
560
- 3950236972,
561
- 2598760366,
562
- 3676645323,
563
- 4110848868,
564
- 158694619,
565
- 1074693433,
566
- 2486872109,
567
- 1084554904,
568
- 2159023899,
569
- 3779177682,
570
- 2119206349,
571
- 2581153459,
572
- 1268518531,
573
- 45156076,
574
- 1926404473,
575
- 1094279634,
576
- 1921049957,
577
- 1773816468,
578
- 1680514487,
579
- 1622305681,
580
- 3218183318,
581
- 1127913791,
582
- 1811633129,
583
- 2406679215,
584
- 56708853,
585
- 1516005722,
586
- 1529189539,
587
- 647155004,
588
- 1327954509,
589
- 3304234068,
590
- 2927585790,
591
- 239541044,
592
- 2460188861,
593
- 1483784281,
594
- 684984465,
595
- 2087854562,
596
- 1437781978,
597
- 768432905,
598
- 860383364,
599
- 4003279944,
600
- 1011827734,
601
- 895350278,
602
- 4135068233,
603
- 2427835175,
604
- 1113890874,
605
- 110166785,
606
- 2996505386,
607
- 2832734881,
608
- 3549156369,
609
- 2349118872,
610
- 2147850545,
611
- 3967211094,
612
- 3802081533,
613
- 1333704272,
614
- 3619564092,
615
- 1040301827,
616
- 2940961016,
617
- 386293345,
618
- 244474911,
619
- 3848673290,
620
- 3301276956,
621
- 302075821,
622
- 895633191,
623
- 1074662119,
624
- 230712882,
625
- 3478659002,
626
- 171732214,
627
- 1291632324,
628
- 1833497219,
629
- 604360431,
630
- 4236010565,
631
- 3342057989,
632
- 3050710768,
633
- 429
634
- ],
635
- null
636
- ],
637
- "numpy": [
638
- "MT19937",
639
- "[ 1678 426152967 3940623813 3094666529 543685075 1182563140\n 3891760959 989241011 421488551 23938028 1961433126 4210321006\n 2520235533 2669478648 3655333552 323428270 4039482806 1800410490\n 2893302937 3273534522 1646326673 3259508709 1662768084 2771030048\n 970657666 1397207139 1666910404 1140777684 1254521637 1177606481\n 2260831406 660114427 2696252711 137133242 3915562884 489017190\n 1873963362 830377780 469586858 2656959545 2427262575 1313156650\n 1311586849 584257227 3149691203 2869338066 601054526 4093376421\n 3883567790 2924104562 3586044770 904048248 4145352076 1920524448\n 3930966203 774023375 2980864227 3942752766 3242216971 710480227\n 2636568395 2022179082 25447573 1514206216 3598194893 2018117511\n 3487936032 1992519442 2131819971 30339023 1470325745 4286701047\n 1336944012 930235882 2833633180 4095244193 52094262 2864731291\n 659964331 2648271814 1137666212 945264234 4008907300 1303251126\n 961830791 1444540312 4070795443 1246657991 1253086582 1105766220\n 2439853755 1567529048 3264789625 578135679 2952636281 2786125542\n 276241492 1936925061 3287801206 1619065996 2112231685 776405753\n 1667919267 2232873809 1972005671 1761451111 1938508456 2005394968\n 3111135561 4100353540 863774769 2262111428 1402546318 1644857308\n 1139269539 1452731741 1008067008 2528235829 2308897321 1522972270\n 4178600259 2957090489 3807556417 712838021 455260405 3026261542\n 3928162482 3422879572 3657387219 2072288913 3390938962 522794104\n 1738402780 328356790 427912532 3235780267 1884739792 3858916350\n 2120144987 2141816333 3505540936 3944049700 2674232049 2333138286\n 2182752300 3574888631 1117254550 4100975142 1467493933 2790507505\n 1385060981 700302427 3362794367 2615837829 2552465629 3697104790\n 633686885 670223478 292540716 4330235 3742814375 505681237\n 2767841323 2029357520 274151705 1000794242 248860144 1651322967\n 3729528982 4114641010 4256159295 1537843799 2854687386 2894442149\n 3586541457 8740681 4221282685 1552960871 3827572720 3137250962\n 1535821700 4079609134 2607251575 2462534624 4218086114 2226973822\n 1081846182 3055898270 3389882440 386449748 2134260450 2518529870\n 870645180 1162196973 996334814 138801753 3736682209 300597743\n 3572525841 832889057 3834389133 61448399 4070848885 4156871257\n 3450808398 2026314798 3454813785 2488527953 3906578575 82209805\n 2937493235 4148659944 2866347659 1234353634 2209900133 3881731706\n 2542849941 2891626092 4033665088 1216491082 774858611 4013251644\n 136584377 1588418268 3119155985 2944076384 3032976268 3578465769\n 1786034230 3022200216 2749896872 1147844089 3007546304 1299435123\n 1162442212 2069238084 1293270565 1867598113 3943955086 3318894480\n 3381924079 3404528141 2679252600 3711334677 82087074 1843146463\n 486886540 1893867571 2244966066 1345193065 1096508930 3233536810\n 3699730729 4262459535 2753821754 2310306583 598068553 876941262\n 3948313416 606521498 2027786182 600586632 1561623470 3842929682\n 1934091453 2246695733 773123261 2518518940 2319124834 3807319021\n 1837054964 1241236920 1630798861 1543767501 2906945166 1296544335\n 2748782554 2256994637 2954873921 1521513094 1251503963 87103899\n 1783631425 754311259 3633264643 2641959197 3004268889 4053712134\n 2381005081 258286536 2374313738 2292428619 409025009 3891582522\n 1909196451 2608419089 2126850215 2870206631 3801280323 2428609899\n 2412410009 2810443092 2998194716 2895836421 2910088179 1548324934\n 2905402421 111387110 2776430322 721673701 3079215503 4137585368\n 1423506591 689846671 1285766565 2011174639 1076914978 3999596812\n 1993569065 270850055 4065721091 2084505665 4194861698 817465128\n 345947148 1234607617 4110047046 509303936 1712062408 2031975574\n 2893019101 3471582022 351066245 150182598 2841249644 2592132533\n 3068729731 119806518 2730442400 2623957309 4151524911 2414941361\n 3809867509 3784185445 2882891158 1910796477 2493060998 3801607279\n 3127514104 1415621887 3920244628 1153275634 960076351 855544636\n 2984195086 473822751 469873311 3952840992 4043374389 1618122677\n 1985032044 1751387242 889924257 4255054832 2352182091 3422391866\n 464821995 331052838 1707301230 4143427900 1783408205 2437957999\n 1240682101 1998981433 1031314574 4232037245 624463918 1304134047\n 917091024 89447819 936889427 515967292 12436010 3740014097\n 2104645786 2143611304 2035982895 953064361 2289032241 979250340\n 3582419514 1837712132 2636652417 2588314424 3840735596 3372470614\n 4007335189 1448220987 645529456 3018111167 692390177 1711686806\n 793698341 2635414828 1801503418 1551886428 2287694663 4289161424\n 1441417175 2226773767 181513107 781147546 2665306462 1672117993\n 3901111078 3432981560 3335118823 3316800149 871498960 3837736883\n 1510877716 514775278 60093836 3892366819 396237576 185594833\n 3179088927 1050431004 1278987960 3360199082 573827675 325266070\n 704910558 4242082631 3884242822 1868707372 4160255605 3517310019\n 3143192310 3458776059 3866223248 322179256 2240090706 612174667\n 1131256147 176709911 2137300689 861927887 3392558955 537863625\n 2511436559 3812262628 4269032935 3062012089 4133443213 2127635917\n 240260932 3887240093 2881867040 2478776885 3073962111 294963230\n 2946302884 1903183949 3792778188 606878076 3533310910 2503592036\n 1322559506 2220719188 4005959108 3258914906 2640002549 3471418188\n 1975236357 2568772207 160367581 2777206030 805852058 2839308193\n 2982158383 3477063842 1324307047 2152602145 3854699699 388576085\n 1930215279 3665520205 4036041902 1448339242 1811757281 746017355\n 1244306563 807359287 979715489 262054772 2000392884 1257685082\n 1187485913 192079659 2016950507 3406409799 3422744778 1904448068\n 1309844529 2223536361 3304749489 768624437 3619321573 779878587\n 2624413381 3394233730 1172215525 4291661301 2529929744 3134962973\n 2428515903 814157078 249754036 3738087947 3272247088 3420890152\n 2164705793 1407078458 625097811 3484367308 2663028665 2230186454\n 3575045812 2587283268 1601584816 1409971944 1310891265 2762797845\n 3757324585 1444415913 4164860256 1470767400 3590129735 3223103727\n 1344860984 1021343898 1766850784 1328530404 3304118393 4197758019\n 3640745826 3778841704 1056287579 1662865420 1992461383 1438437317\n 1024056956 3260622101 2912840152 589000493 2391602157 732964472\n 777873286 2750058509 1997266715 1674178931 587220524 1876770191\n 521947450 3790532631 38497818 1080319097 1146926480 3098885486\n 458492374 1313171881 3397753220 2358236032 226297480 3364511207\n 3960607028 2060991476 790152939 1806123002 2652279627 1186541330\n 1321582533 3590360731 3232734272 2310898616 3579413676 2300314198\n 2509758832 2393896519 1310333063 4212483373 2933427062 2654739221\n 165085541 2538970156 849165946 1271853687 295751908 3933761611\n 1737676480 3173188990 3542386758 2956571284 2440019338 3158020357\n 256768289 2147284324 803397689 2081421790 1746857565 4131866799\n 3703028288 3271514580 3199435577 3366607790 3556819625 387959163\n 2293912305 1888208714 1272563203 3119526711 2845741911 2881200120]",
640
- 624,
641
- 0,
642
- 0.0
643
- ],
644
- "mlx": 1756849692,
645
- "mlx_key": [
646
- 0,
647
- 1756849182
648
- ]
649
- },
650
- "training_args_snapshot": {
651
- "output_dir": "outy1266_align_last7",
652
- "max_kv_size": 1536,
653
- "model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen3-4B-MLX-8bit",
654
- "ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/InferenceIllusionist/gpt-oss-20b-MLX-4bit",
655
- "draft_model_path": null,
656
- "benchmark_every": 0,
657
- "benchmark_dataset": "gsm8k",
658
- "benchmark_dataset_config": "main",
659
- "benchmark_split": "test",
660
- "benchmark_samples": 10,
661
- "benchmark_prompt_key": "question",
662
- "benchmark_answer_key": "answer",
663
- "benchmark_max_new_tokens": 196,
664
- "benchmark_temperature": 0.0,
665
- "benchmark_top_p": 1.0,
666
- "benchmark_top_k": 0,
667
- "benchmark_use_chat_template": true,
668
- "benchmark_stop_on_error": false,
669
- "min_think_tokens": 32,
670
- "think_end_early_bias": -12.0,
671
- "bias_answer_start_after_min_think": true,
672
- "train_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/train.jsonl",
673
- "val_dataset_path": null,
674
- "dataset_name": null,
675
- "dataset_config": null,
676
- "dataset_train_split": "train",
677
- "dataset_val_split": "test",
678
- "dataset_prompt_key": "prompt",
679
- "dataset_answer_key": "completion",
680
- "max_prompt_len": 350,
681
- "max_gen_len": 128,
682
- "system_prompt": null,
683
- "think_start_tag": "<think>",
684
- "think_end_tag": "</think>",
685
- "answer_start_tag": "<answer>",
686
- "answer_end_tag": "</answer>",
687
- "think_boost_tokens": 24,
688
- "think_temperature": 0.15,
689
- "answer_temperature": 0.1,
690
- "sampling_top_p": 0.6,
691
- "sampling_min_p": 0.05,
692
- "sampling_top_k": 40,
693
- "repetition_penalty": 1.15,
694
- "repetition_context_size": 64,
695
- "hard_mask_mcq_first_token": true,
696
- "mcq_letter_lift": 10.0,
697
- "mcq_ban_first_bias": -14.0,
698
- "nonmcq_ban_first_bias": -10.0,
699
- "mcq_close_after_k": 1,
700
- "min_answer_tokens": 6,
701
- "min_answer_tokens_mcq": 1,
702
- "bias_close_think": 6.0,
703
- "bias_answer_start": 3.0,
704
- "punish_reopen_think": -3.0,
705
- "punish_extra_think_end": -6.0,
706
- "bias_eos_after_answer": 4.0,
707
- "allow_tool_calls": false,
708
- "tool_call_penalty": 1.0,
709
- "reward_content_type": "smart",
710
- "reward_format_weight": 0.2,
711
- "reward_content_weight": 0.7,
712
- "think_reward_weight": 0.1,
713
- "think_len_min": 16,
714
- "think_len_max": 64,
715
- "use_lora": false,
716
- "num_rollout_samples": 3,
717
- "ppo_batch_size": 1,
718
- "grpo_beta": 0.1,
719
- "learning_rate": 1.4e-06,
720
- "optimizer_beta1": 0.9,
721
- "optimizer_beta2": 0.95,
722
- "optimizer_weight_decay": 0.01,
723
- "grad_clip_norm": 0.35,
724
- "save_optimizer_state": false,
725
- "lr_schedule_config": {
726
- "name": "cosine_decay",
727
- "arguments": [
728
- 1.4e-06,
729
- 60000,
730
- 2e-07
731
- ],
732
- "warmup": 4000,
733
- "warmup_init": 2e-07
734
- },
735
- "grad_accum_steps": 2,
736
- "num_training_steps": 45869,
737
- "save_every": 10,
738
- "eval_every": 0,
739
- "seed": 1678,
740
- "shuffle_data": true,
741
- "use_grad_checkpointing": false,
742
- "grad_checkpoint_layers": 0,
743
- "log_samples_every": 1,
744
- "max_logged_samples": 50,
745
- "log_prompts": true,
746
- "sample_log_path": null,
747
- "kv_bits": 0,
748
- "kv_group_size": 64,
749
- "quantized_kv_start": 0,
750
- "verbose": true,
751
- "use_wandb": true,
752
- "wandb_project": "reasonable-qwen-4b-mlxv2isi",
753
- "wandb_entity": null,
754
- "wandb_run_name": null,
755
- "resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last7/checkpoint_20250903_073932_periodic_update_1160",
756
- "allow_cross_arch_ref": true,
757
- "align_bridge_path": null,
758
- "align_bridge_weight": 1.0,
759
- "align_pool": "mean",
760
- "align_after_tag": "<|start|>assistant<|channel|>analysis<|message|>",
761
- "effective_batch_size": 4
762
- }
763
- }