Text Generation
MLX
Safetensors
GGUF
qwen3
conversational
adeelahmad commited on
Commit
4d71219
·
1 Parent(s): befc3e5

Upload folder using huggingface_hub

Browse files
Files changed (30) hide show
  1. .gitattributes +6 -0
  2. Modelfile +1 -1
  3. checkpoint_20250903_232422_periodic_update_2600/added_tokens.json +38 -0
  4. checkpoint_20250903_232422_periodic_update_2600/chat_template.jinja +96 -0
  5. checkpoint_20250903_232422_periodic_update_2600/config.json +30 -0
  6. checkpoint_20250903_232422_periodic_update_2600/merges.txt +0 -0
  7. checkpoint_20250903_232422_periodic_update_2600/special_tokens_map.json +102 -0
  8. checkpoint_20250903_232422_periodic_update_2600/tokenizer_config.json +318 -0
  9. checkpoint_20250903_232422_periodic_update_2600/training_state.json +763 -0
  10. checkpoint_20250903_232422_periodic_update_2600/vocab.json +0 -0
  11. checkpoint_20250903_232551_exit_request_update_2602/added_tokens.json +38 -0
  12. checkpoint_20250903_232551_exit_request_update_2602/chat_template.jinja +96 -0
  13. checkpoint_20250903_232551_exit_request_update_2602/config.json +30 -0
  14. checkpoint_20250903_232551_exit_request_update_2602/merges.txt +0 -0
  15. checkpoint_20250903_232551_exit_request_update_2602/special_tokens_map.json +102 -0
  16. checkpoint_20250903_232551_exit_request_update_2602/tokenizer_config.json +318 -0
  17. checkpoint_20250903_232551_exit_request_update_2602/training_state.json +763 -0
  18. checkpoint_20250903_232551_exit_request_update_2602/vocab.json +0 -0
  19. checkpoint_20250903_232617_shutdown_signal_update_2602/added_tokens.json +38 -0
  20. checkpoint_20250903_232617_shutdown_signal_update_2602/chat_template.jinja +96 -0
  21. checkpoint_20250903_232617_shutdown_signal_update_2602/config.json +30 -0
  22. checkpoint_20250903_232617_shutdown_signal_update_2602/merges.txt +0 -0
  23. checkpoint_20250903_232617_shutdown_signal_update_2602/special_tokens_map.json +102 -0
  24. checkpoint_20250903_232617_shutdown_signal_update_2602/tokenizer_config.json +318 -0
  25. checkpoint_20250903_232617_shutdown_signal_update_2602/training_state.json +763 -0
  26. checkpoint_20250903_232617_shutdown_signal_update_2602/vocab.json +0 -0
  27. plots/loss_vs_updates.png +0 -0
  28. plots/lr_vs_updates.png +0 -0
  29. plots/reward_vs_updates.png +0 -0
  30. training_metrics.csv +0 -0
.gitattributes CHANGED
@@ -37,3 +37,9 @@ tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  ReasonableQwen3-4B-Q4_k.gguf filter=lfs diff=lfs merge=lfs -text
38
  ReasonableQwen3-4B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
39
  ReasonableQwen3-4B-Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
37
  ReasonableQwen3-4B-Q4_k.gguf filter=lfs diff=lfs merge=lfs -text
38
  ReasonableQwen3-4B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
39
  ReasonableQwen3-4B-Q4_K.gguf filter=lfs diff=lfs merge=lfs -text
40
+ .DS_Store filter=lfs diff=lfs merge=lfs -text
41
+ checkpoint_20250903_232422_periodic_update_2600/tokenizer.json filter=lfs diff=lfs merge=lfs -text
42
+ checkpoint_20250903_232551_exit_request_update_2602/tokenizer.json filter=lfs diff=lfs merge=lfs -text
43
+ checkpoint_20250903_232617_shutdown_signal_update_2602/tokenizer.json filter=lfs diff=lfs merge=lfs -text
44
+ samples_debug.jsonl filter=lfs diff=lfs merge=lfs -text
45
+ training_debug.log filter=lfs diff=lfs merge=lfs -text
Modelfile CHANGED
@@ -1,4 +1,4 @@
1
- From ./ReasonableQwen3-4B-Q8_0.gguf
2
 
3
  TEMPLATE """{{- if .Messages }}
4
  {{- if or .System .Tools }}<|im_start|>system
 
1
+ From ./ReasonableQwen3-4B-Q4_k.gguf
2
 
3
  TEMPLATE """{{- if .Messages }}
4
  {{- if or .System .Tools }}<|im_start|>system
checkpoint_20250903_232422_periodic_update_2600/added_tokens.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</answer>": 151669,
3
+ "</img_base64>": 151670,
4
+ "</json_output>": 151671,
5
+ "</ocr_text>": 151672,
6
+ "</think>": 151668,
7
+ "</tool_call>": 151658,
8
+ "</tool_code>": 151673,
9
+ "</tool_response>": 151666,
10
+ "<answer>": 151674,
11
+ "<img_base64>": 151675,
12
+ "<json_output>": 151676,
13
+ "<ocr_text>": 151677,
14
+ "<think>": 151667,
15
+ "<tool_call>": 151657,
16
+ "<tool_code>": 151678,
17
+ "<tool_response>": 151665,
18
+ "<|box_end|>": 151649,
19
+ "<|box_start|>": 151648,
20
+ "<|endoftext|>": 151643,
21
+ "<|file_sep|>": 151664,
22
+ "<|fim_middle|>": 151660,
23
+ "<|fim_pad|>": 151662,
24
+ "<|fim_prefix|>": 151659,
25
+ "<|fim_suffix|>": 151661,
26
+ "<|im_end|>": 151645,
27
+ "<|im_start|>": 151644,
28
+ "<|image_pad|>": 151655,
29
+ "<|object_ref_end|>": 151647,
30
+ "<|object_ref_start|>": 151646,
31
+ "<|quad_end|>": 151651,
32
+ "<|quad_start|>": 151650,
33
+ "<|repo_name|>": 151663,
34
+ "<|video_pad|>": 151656,
35
+ "<|vision_end|>": 151653,
36
+ "<|vision_pad|>": 151654,
37
+ "<|vision_start|>": 151652
38
+ }
checkpoint_20250903_232422_periodic_update_2600/chat_template.jinja ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- set tool_start = "<tool_response>" %}
21
+ {%- set tool_start_length = tool_start|length %}
22
+ {%- set start_of_message = message.content[:tool_start_length] %}
23
+ {%- set tool_end = "</tool_response>" %}
24
+ {%- set tool_end_length = tool_end|length %}
25
+ {%- set start_pos = (message.content|length) - tool_end_length %}
26
+ {%- if start_pos < 0 %}
27
+ {%- set start_pos = 0 %}
28
+ {%- endif %}
29
+ {%- set end_of_message = message.content[start_pos:] %}
30
+ {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
31
+ {%- set ns.multi_step_tool = false %}
32
+ {%- set ns.last_query_index = index %}
33
+ {%- endif %}
34
+ {%- endfor %}
35
+ {%- for message in messages %}
36
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
37
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
38
+ {%- elif message.role == "assistant" %}
39
+ {%- set content = message.content %}
40
+ {%- set reasoning_content = '' %}
41
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
42
+ {%- set reasoning_content = message.reasoning_content %}
43
+ {%- else %}
44
+ {%- if '</think>' in message.content %}
45
+ {%- set content = (message.content.split('</think>')|last).lstrip('\n') %}
46
+ {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\n') %}
47
+ {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
48
+ {%- endif %}
49
+ {%- endif %}
50
+ {%- if loop.index0 > ns.last_query_index %}
51
+ {%- if loop.last or (not loop.last and reasoning_content) %}
52
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
53
+ {%- else %}
54
+ {{- '<|im_start|>' + message.role + '\n' + content }}
55
+ {%- endif %}
56
+ {%- else %}
57
+ {{- '<|im_start|>' + message.role + '\n' + content }}
58
+ {%- endif %}
59
+ {%- if message.tool_calls %}
60
+ {%- for tool_call in message.tool_calls %}
61
+ {%- if (loop.first and content) or (not loop.first) %}
62
+ {{- '\n' }}
63
+ {%- endif %}
64
+ {%- if tool_call.function %}
65
+ {%- set tool_call = tool_call.function %}
66
+ {%- endif %}
67
+ {{- '<tool_call>\n{"name": "' }}
68
+ {{- tool_call.name }}
69
+ {{- '", "arguments": ' }}
70
+ {%- if tool_call.arguments is string %}
71
+ {{- tool_call.arguments }}
72
+ {%- else %}
73
+ {{- tool_call.arguments | tojson }}
74
+ {%- endif %}
75
+ {{- '}\n</tool_call>' }}
76
+ {%- endfor %}
77
+ {%- endif %}
78
+ {{- '<|im_end|>\n' }}
79
+ {%- elif message.role == "tool" %}
80
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
81
+ {{- '<|im_start|>user' }}
82
+ {%- endif %}
83
+ {{- '\n<tool_response>\n' }}
84
+ {{- message.content }}
85
+ {{- '\n</tool_response>' }}
86
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
87
+ {{- '<|im_end|>\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|im_start|>assistant\n' }}
93
+ {%- if enable_thinking is defined and enable_thinking is false %}
94
+ {{- '<think>\n\n</think>\n\n' }}
95
+ {%- endif %}
96
+ {%- endif %}
checkpoint_20250903_232422_periodic_update_2600/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2560,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 9728,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "qwen3",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 36,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
checkpoint_20250903_232422_periodic_update_2600/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232422_periodic_update_2600/special_tokens_map.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "</answer>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</img_base64>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "</json_output>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</ocr_text>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "</think>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "</tool_code>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<answer>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<img_base64>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<json_output>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<ocr_text>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<think>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<tool_code>",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ }
87
+ ],
88
+ "eos_token": {
89
+ "content": "<|im_end|>",
90
+ "lstrip": false,
91
+ "normalized": false,
92
+ "rstrip": false,
93
+ "single_word": false
94
+ },
95
+ "pad_token": {
96
+ "content": "<|endoftext|>",
97
+ "lstrip": false,
98
+ "normalized": false,
99
+ "rstrip": false,
100
+ "single_word": false
101
+ }
102
+ }
checkpoint_20250903_232422_periodic_update_2600/tokenizer_config.json ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": true
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": true
212
+ },
213
+ "151669": {
214
+ "content": "</answer>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "151670": {
222
+ "content": "</img_base64>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "151671": {
230
+ "content": "</json_output>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "151672": {
238
+ "content": "</ocr_text>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "151673": {
246
+ "content": "</tool_code>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "151674": {
254
+ "content": "<answer>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "151675": {
262
+ "content": "<img_base64>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": true
268
+ },
269
+ "151676": {
270
+ "content": "<json_output>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": true
276
+ },
277
+ "151677": {
278
+ "content": "<ocr_text>",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": true
284
+ },
285
+ "151678": {
286
+ "content": "<tool_code>",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": true
292
+ }
293
+ },
294
+ "additional_special_tokens": [
295
+ "</answer>",
296
+ "</img_base64>",
297
+ "</json_output>",
298
+ "</ocr_text>",
299
+ "</think>",
300
+ "</tool_code>",
301
+ "<answer>",
302
+ "<img_base64>",
303
+ "<json_output>",
304
+ "<ocr_text>",
305
+ "<think>",
306
+ "<tool_code>"
307
+ ],
308
+ "bos_token": null,
309
+ "clean_up_tokenization_spaces": false,
310
+ "eos_token": "<|im_end|>",
311
+ "errors": "replace",
312
+ "extra_special_tokens": {},
313
+ "model_max_length": 131072,
314
+ "pad_token": "<|endoftext|>",
315
+ "split_special_tokens": false,
316
+ "tokenizer_class": "Qwen2Tokenizer",
317
+ "unk_token": null
318
+ }
checkpoint_20250903_232422_periodic_update_2600/training_state.json ADDED
@@ -0,0 +1,763 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 2600,
3
+ "num_updates": 2600,
4
+ "use_lora": false,
5
+ "rng_state": {
6
+ "python": [
7
+ 3,
8
+ [
9
+ 3967271387,
10
+ 1026184164,
11
+ 3965041145,
12
+ 2291916954,
13
+ 1837720366,
14
+ 1080231142,
15
+ 3842271517,
16
+ 3425527812,
17
+ 240197365,
18
+ 2952456686,
19
+ 1242835039,
20
+ 3963990717,
21
+ 2700559131,
22
+ 1438349802,
23
+ 2358361310,
24
+ 27806138,
25
+ 391662474,
26
+ 1397158451,
27
+ 3290575848,
28
+ 1443279612,
29
+ 2994218186,
30
+ 639015565,
31
+ 1458236633,
32
+ 2738794811,
33
+ 75660796,
34
+ 822747358,
35
+ 4062186731,
36
+ 2798375312,
37
+ 2554111111,
38
+ 4032040375,
39
+ 734595846,
40
+ 991400690,
41
+ 902505219,
42
+ 3264611100,
43
+ 340175355,
44
+ 1539772439,
45
+ 2699973612,
46
+ 3090769310,
47
+ 3227344320,
48
+ 2332974129,
49
+ 2522140629,
50
+ 1358053300,
51
+ 2451308853,
52
+ 3863433119,
53
+ 1654992332,
54
+ 4043410239,
55
+ 1192164566,
56
+ 553958980,
57
+ 1523920377,
58
+ 307092399,
59
+ 4122746375,
60
+ 2275324450,
61
+ 1118603971,
62
+ 2948003228,
63
+ 1285607053,
64
+ 2683552785,
65
+ 150677996,
66
+ 2833336017,
67
+ 1500750038,
68
+ 761162663,
69
+ 567868195,
70
+ 12561648,
71
+ 3472908617,
72
+ 421911787,
73
+ 1223019265,
74
+ 1609174410,
75
+ 2139856908,
76
+ 1289583719,
77
+ 1590076021,
78
+ 997166301,
79
+ 2579634303,
80
+ 3273266456,
81
+ 2621387977,
82
+ 3943650249,
83
+ 1359930915,
84
+ 2718495645,
85
+ 3602690125,
86
+ 4246287691,
87
+ 1450000782,
88
+ 439802689,
89
+ 1554690948,
90
+ 1911779206,
91
+ 1612261319,
92
+ 3158623330,
93
+ 1363688027,
94
+ 2629041297,
95
+ 1220231722,
96
+ 3909353626,
97
+ 3864988698,
98
+ 715439623,
99
+ 2690554799,
100
+ 1962746674,
101
+ 773247238,
102
+ 2801483771,
103
+ 1762340389,
104
+ 3437644373,
105
+ 2872830304,
106
+ 2008579063,
107
+ 1514567691,
108
+ 1583533810,
109
+ 3009140248,
110
+ 3722410595,
111
+ 2613372944,
112
+ 2287445042,
113
+ 1874155746,
114
+ 3245031202,
115
+ 1976154694,
116
+ 139059957,
117
+ 4119838693,
118
+ 3305140779,
119
+ 1999827708,
120
+ 50827604,
121
+ 2243156149,
122
+ 130672473,
123
+ 2447788079,
124
+ 1599266450,
125
+ 336263314,
126
+ 2105192952,
127
+ 2297407557,
128
+ 1049027620,
129
+ 891242113,
130
+ 3062510035,
131
+ 1350083608,
132
+ 1285665300,
133
+ 393408272,
134
+ 1685144927,
135
+ 908939345,
136
+ 4265052142,
137
+ 3410082095,
138
+ 2059177364,
139
+ 2663072991,
140
+ 2839859162,
141
+ 1807788264,
142
+ 3670966323,
143
+ 1408830836,
144
+ 2277275526,
145
+ 2132904638,
146
+ 3249212407,
147
+ 1532782497,
148
+ 2146509655,
149
+ 1042323006,
150
+ 1591886840,
151
+ 775265065,
152
+ 1831216413,
153
+ 2942338395,
154
+ 3275679928,
155
+ 636464774,
156
+ 625752514,
157
+ 3395261534,
158
+ 3422484700,
159
+ 3288346824,
160
+ 3414769759,
161
+ 1598966223,
162
+ 147553345,
163
+ 2902205377,
164
+ 2854801943,
165
+ 1981896482,
166
+ 4163890305,
167
+ 161164831,
168
+ 2802426963,
169
+ 2576904579,
170
+ 4016006363,
171
+ 511281868,
172
+ 1766876080,
173
+ 2351919422,
174
+ 3951315846,
175
+ 3685372087,
176
+ 2978153450,
177
+ 2065462037,
178
+ 3763241279,
179
+ 1424853649,
180
+ 1604969762,
181
+ 3260153236,
182
+ 2102904571,
183
+ 500994218,
184
+ 2439467442,
185
+ 402407996,
186
+ 351218299,
187
+ 881150311,
188
+ 3581411273,
189
+ 259025625,
190
+ 1267497165,
191
+ 2062889741,
192
+ 2691827516,
193
+ 817381649,
194
+ 1212806228,
195
+ 2759627479,
196
+ 220373713,
197
+ 2595028892,
198
+ 2442232401,
199
+ 1569800854,
200
+ 343128555,
201
+ 636335630,
202
+ 1962444392,
203
+ 3804276325,
204
+ 2779312761,
205
+ 1374006413,
206
+ 3026390385,
207
+ 2995559877,
208
+ 3655553179,
209
+ 1662430865,
210
+ 31590074,
211
+ 121330124,
212
+ 2800224703,
213
+ 775347029,
214
+ 448217505,
215
+ 2063355508,
216
+ 1013466290,
217
+ 2918262154,
218
+ 3426228026,
219
+ 2226777553,
220
+ 4156207311,
221
+ 3333921308,
222
+ 3947863768,
223
+ 1249538366,
224
+ 236920610,
225
+ 3355109765,
226
+ 2123516788,
227
+ 1096721379,
228
+ 2557118369,
229
+ 3229855130,
230
+ 2010608274,
231
+ 1991908981,
232
+ 1262223436,
233
+ 1464738273,
234
+ 3464681822,
235
+ 2630317900,
236
+ 2254902127,
237
+ 3306153656,
238
+ 208406325,
239
+ 1067526520,
240
+ 4026720699,
241
+ 3567293771,
242
+ 3658434018,
243
+ 1975944438,
244
+ 3899032814,
245
+ 1715875890,
246
+ 1000236124,
247
+ 2306234460,
248
+ 3621852674,
249
+ 2874181232,
250
+ 200568775,
251
+ 3073791375,
252
+ 2523932121,
253
+ 708394853,
254
+ 3365567442,
255
+ 498387788,
256
+ 3480987681,
257
+ 3657038461,
258
+ 3064855411,
259
+ 829347864,
260
+ 1028039079,
261
+ 3602672997,
262
+ 1744316950,
263
+ 1132707734,
264
+ 926119298,
265
+ 2772575537,
266
+ 282854042,
267
+ 1900724014,
268
+ 3029583931,
269
+ 395776534,
270
+ 670561433,
271
+ 3274389917,
272
+ 2802241091,
273
+ 3071226945,
274
+ 2361585457,
275
+ 1797476586,
276
+ 2225467261,
277
+ 1706335457,
278
+ 492246825,
279
+ 2477168512,
280
+ 815813939,
281
+ 2855420512,
282
+ 3495681585,
283
+ 2575223221,
284
+ 2537280983,
285
+ 3314566655,
286
+ 1164100149,
287
+ 1977301214,
288
+ 4193175418,
289
+ 480545945,
290
+ 3537844675,
291
+ 1664891516,
292
+ 1443772533,
293
+ 3295232896,
294
+ 3317793780,
295
+ 1215470101,
296
+ 3218299303,
297
+ 3575188822,
298
+ 1250351790,
299
+ 749808713,
300
+ 2050084806,
301
+ 572159727,
302
+ 308805938,
303
+ 780768817,
304
+ 722735732,
305
+ 756700681,
306
+ 3795455885,
307
+ 282113829,
308
+ 3919074469,
309
+ 3707744024,
310
+ 670882057,
311
+ 17817083,
312
+ 1637485633,
313
+ 3281135527,
314
+ 1509269331,
315
+ 12243112,
316
+ 1945667537,
317
+ 2089133460,
318
+ 1778357461,
319
+ 1142017596,
320
+ 1727646817,
321
+ 597203797,
322
+ 894461142,
323
+ 247967411,
324
+ 3636397797,
325
+ 1257083668,
326
+ 3535042016,
327
+ 3456404171,
328
+ 2272706014,
329
+ 2261998731,
330
+ 2819821439,
331
+ 4038821677,
332
+ 2599180325,
333
+ 1226717655,
334
+ 3333051398,
335
+ 2986286242,
336
+ 149297818,
337
+ 1116252696,
338
+ 3983432272,
339
+ 1563620267,
340
+ 166236638,
341
+ 1342965486,
342
+ 3425751359,
343
+ 565185296,
344
+ 1639862847,
345
+ 2909755277,
346
+ 186513437,
347
+ 1688704278,
348
+ 999207239,
349
+ 113552983,
350
+ 1298001852,
351
+ 319612107,
352
+ 3792112676,
353
+ 747046330,
354
+ 1185372334,
355
+ 11347435,
356
+ 1792826169,
357
+ 297736755,
358
+ 632733242,
359
+ 684491354,
360
+ 4194645610,
361
+ 680766781,
362
+ 3666422277,
363
+ 4164736579,
364
+ 1162866320,
365
+ 1705411912,
366
+ 2372330857,
367
+ 2646063778,
368
+ 3829899589,
369
+ 1317565577,
370
+ 1401229255,
371
+ 1808220379,
372
+ 2203148417,
373
+ 1261380648,
374
+ 2201630139,
375
+ 3494578862,
376
+ 2760365219,
377
+ 378948370,
378
+ 4115035773,
379
+ 2194258730,
380
+ 493318225,
381
+ 4111876160,
382
+ 198214727,
383
+ 2938074736,
384
+ 350928625,
385
+ 846140936,
386
+ 2796483478,
387
+ 1892200324,
388
+ 1206520424,
389
+ 2785939791,
390
+ 391211171,
391
+ 2248891773,
392
+ 1382812750,
393
+ 3015580149,
394
+ 1268639917,
395
+ 724243112,
396
+ 76030864,
397
+ 1018227745,
398
+ 2351890926,
399
+ 2600896235,
400
+ 220033969,
401
+ 1401586091,
402
+ 2071710477,
403
+ 3210350300,
404
+ 1690420126,
405
+ 3321335187,
406
+ 2667558880,
407
+ 3214017202,
408
+ 1054591551,
409
+ 2685286044,
410
+ 4010372155,
411
+ 2879699849,
412
+ 3593888005,
413
+ 1630255656,
414
+ 722870681,
415
+ 2824448803,
416
+ 758046745,
417
+ 486385045,
418
+ 148699239,
419
+ 1823339336,
420
+ 3626156994,
421
+ 221046228,
422
+ 191046018,
423
+ 2152525365,
424
+ 1859333602,
425
+ 2880813865,
426
+ 466507697,
427
+ 221358203,
428
+ 851466719,
429
+ 2415410943,
430
+ 2113512377,
431
+ 3716237849,
432
+ 3272429115,
433
+ 3228418199,
434
+ 3284186060,
435
+ 4247471141,
436
+ 2201569734,
437
+ 223470072,
438
+ 2002708052,
439
+ 3198208064,
440
+ 2376412412,
441
+ 1463203154,
442
+ 1809443204,
443
+ 2359348598,
444
+ 1600065056,
445
+ 3215221234,
446
+ 4165516417,
447
+ 3921844367,
448
+ 1959771516,
449
+ 1679280620,
450
+ 1749349436,
451
+ 2233430027,
452
+ 2717267181,
453
+ 3964670612,
454
+ 3422468358,
455
+ 2954866445,
456
+ 2445246838,
457
+ 1362196395,
458
+ 2595844428,
459
+ 3645080545,
460
+ 1587556344,
461
+ 2755729671,
462
+ 229885802,
463
+ 2185126283,
464
+ 3766598538,
465
+ 2737234349,
466
+ 364955618,
467
+ 1607374308,
468
+ 2706323199,
469
+ 3687070449,
470
+ 1627005950,
471
+ 2674407664,
472
+ 3630669444,
473
+ 1033589683,
474
+ 3954912200,
475
+ 2072092797,
476
+ 3862368041,
477
+ 2370545261,
478
+ 163801077,
479
+ 2144696655,
480
+ 576857368,
481
+ 2814318568,
482
+ 713192918,
483
+ 2953431186,
484
+ 2672630836,
485
+ 3702980078,
486
+ 748001453,
487
+ 3504919089,
488
+ 1058606135,
489
+ 1948644168,
490
+ 3782905274,
491
+ 1801691984,
492
+ 1840015467,
493
+ 3958077367,
494
+ 3902829790,
495
+ 1403592563,
496
+ 2155574652,
497
+ 1909700095,
498
+ 450763344,
499
+ 3687053349,
500
+ 419335117,
501
+ 1301216505,
502
+ 2876532296,
503
+ 1915964542,
504
+ 3258106886,
505
+ 2200859680,
506
+ 2692011171,
507
+ 1456962543,
508
+ 2389659463,
509
+ 1346117759,
510
+ 1395178467,
511
+ 2284331656,
512
+ 1517154729,
513
+ 838732991,
514
+ 1576226382,
515
+ 731833408,
516
+ 2402980990,
517
+ 4136742285,
518
+ 968065114,
519
+ 2846239888,
520
+ 3160979071,
521
+ 3064847145,
522
+ 3268819516,
523
+ 1501375000,
524
+ 3262611637,
525
+ 1482644061,
526
+ 3869662591,
527
+ 3715366422,
528
+ 4242305207,
529
+ 122918596,
530
+ 3597934906,
531
+ 3169248692,
532
+ 3724069332,
533
+ 2002817636,
534
+ 3617952404,
535
+ 1380342751,
536
+ 2000442134,
537
+ 3909627280,
538
+ 1399763842,
539
+ 2025926546,
540
+ 122000688,
541
+ 2311840429,
542
+ 2008978145,
543
+ 2393006054,
544
+ 2691729392,
545
+ 1360604707,
546
+ 3592756263,
547
+ 1803431373,
548
+ 875104400,
549
+ 762907645,
550
+ 3955227568,
551
+ 3515418115,
552
+ 3226061968,
553
+ 3397142323,
554
+ 968385291,
555
+ 3045138149,
556
+ 440380095,
557
+ 3765813124,
558
+ 2579837417,
559
+ 3198260520,
560
+ 2678283459,
561
+ 2395660105,
562
+ 759161070,
563
+ 3657479079,
564
+ 130768490,
565
+ 1813297074,
566
+ 1825796373,
567
+ 80597496,
568
+ 1015972759,
569
+ 3567390046,
570
+ 1117320950,
571
+ 2284466118,
572
+ 1352877410,
573
+ 2870165791,
574
+ 3391069470,
575
+ 3848649012,
576
+ 3864938430,
577
+ 83162416,
578
+ 3514199373,
579
+ 1679135968,
580
+ 3910876316,
581
+ 1897074782,
582
+ 1186656431,
583
+ 1443718422,
584
+ 930721565,
585
+ 2580384646,
586
+ 3071840050,
587
+ 1817142007,
588
+ 306252371,
589
+ 583436048,
590
+ 1143706159,
591
+ 3470195927,
592
+ 2629056830,
593
+ 1054234580,
594
+ 839422126,
595
+ 786000543,
596
+ 1645391398,
597
+ 437221603,
598
+ 1655602391,
599
+ 3804125963,
600
+ 471478435,
601
+ 3007407057,
602
+ 3701105273,
603
+ 1821795365,
604
+ 2492888224,
605
+ 2019211184,
606
+ 3537434119,
607
+ 3674233243,
608
+ 746650350,
609
+ 1262611233,
610
+ 723466668,
611
+ 460013785,
612
+ 233285992,
613
+ 1809385760,
614
+ 4096419422,
615
+ 1401994125,
616
+ 927259547,
617
+ 3580706171,
618
+ 3954080383,
619
+ 1440686000,
620
+ 2183228917,
621
+ 344398639,
622
+ 3829306612,
623
+ 2629498462,
624
+ 3659623063,
625
+ 1120047396,
626
+ 440893707,
627
+ 2300023223,
628
+ 2915441674,
629
+ 931117951,
630
+ 92325697,
631
+ 710694309,
632
+ 717544865,
633
+ 392
634
+ ],
635
+ null
636
+ ],
637
+ "numpy": [
638
+ "MT19937",
639
+ "[ 15572 980513701 2334715163 3585534944 1822198675 158479007\n 1300107201 2003433159 424170022 4102602503 2437447838 1924282775\n 2084306490 4132823124 4216394081 1526156729 4231078312 3658730376\n 3599347945 3798337125 544676946 3949203055 1596292274 2255158710\n 703032348 636265253 2880318131 3345387760 162413307 2418710564\n 3712245020 2175226970 563044056 2939814745 2838234633 468141434\n 616739654 564867267 2130155541 815641611 601811839 2004017220\n 3627706467 3951463947 810570068 3028421201 454655469 3270345648\n 555008207 3255294172 3259033389 429183833 272696145 2007214122\n 2243779629 1934853570 517873959 1769075612 2057249323 825685197\n 21711389 271106734 3943034084 3547272802 1718926725 3289803093\n 2224067888 3644890877 3431377018 1754806530 2376472949 2892610362\n 1500501344 3824621710 1417356523 4122790557 775716514 1813030967\n 3994108828 391693578 1388189506 1179060416 1727839607 3646219745\n 3467814014 1642908326 1500533561 1281477999 2139613886 209245364\n 1449934686 3593983958 693904485 999778614 847538348 922048571\n 1218470567 916177375 1196217437 3715695791 3572687936 2177952212\n 2976499668 1502043605 3295312140 473156180 3489777550 4116262924\n 726728023 266896064 1207195182 1422796794 3298302327 2546802485\n 3089285157 4087066326 281999229 3833891270 4133996591 3393553875\n 1790581640 1088958086 372349629 1150038540 3967005853 3449868499\n 2783031950 745299387 4177989191 440862037 1630705675 3432601205\n 3976086290 2477494074 2631694750 55867155 3381732871 3988268061\n 4190916192 3426936986 1292805609 2800262421 1433946529 2604379311\n 3803100625 2130255467 4134910564 3389303350 912295037 1986407142\n 60132537 1465812628 2556679777 768839848 561642210 2962288069\n 2900250399 2446769166 2830015834 1820339287 3836052622 3843539266\n 3448899655 719693687 2608513431 807017838 705657612 1313405027\n 308572498 3011745795 3544875535 3662789479 792949536 1679330381\n 2262304426 3714222327 3252067572 3530366244 2847367189 1818638534\n 4196918839 1197188921 1714287054 3610324578 1759524349 658319653\n 4062775635 2170322251 3246948233 467741743 2311362121 1326083926\n 2215935251 2860151158 3543259014 4288886076 1000061343 35607424\n 3800789574 1024328448 2871144392 1452668283 2547917804 794856369\n 3652179617 850026679 66787329 99615284 3360338772 2597540132\n 1809210064 3923947080 4257343339 372176093 3483099399 721873771\n 1101276431 2834898271 76176267 4200628658 2773607601 3516401276\n 3454896121 2354147457 1223458703 3128119783 529915195 2585910314\n 3544532140 551469296 3014823830 3511999051 1463986000 3624754398\n 704833819 3852626834 2711357985 1324465084 1627535231 570708901\n 2717601289 725003848 1942965082 328822490 3620824822 1651096734\n 2785116849 1685019798 1956468619 3534906475 922043906 3007951301\n 4238844799 755293417 3011697131 446474988 2478554140 2374977239\n 1380053003 2243890933 2686719863 2130566958 3840030097 1793607073\n 3796244776 2639065600 3001847252 1477803129 1197364324 2384746982\n 766409730 2722234329 3102942583 2032952634 961381465 1104664880\n 431680105 3809455490 596671995 2974418974 2572169252 3211875863\n 1700476003 2461146565 4138090399 3571815849 2874279728 3348404697\n 1894538786 1654054896 1861571639 643113073 1024018105 110160162\n 3688705424 1588307750 1010684555 1071478016 3908839978 1361810520\n 4077136201 1942152543 3862285636 788921650 3325732842 2086596894\n 2354559341 920752382 2089296746 412564844 3783133394 3542635692\n 151261507 2024582056 27012071 3714904670 2251973869 3415653000\n 3122199829 1793993298 3504479999 2494502573 3472923469 628884745\n 400477905 2068607674 2511099917 76006962 4127192322 656421806\n 2099151600 4055081824 3120853595 1902231402 1793968517 2739152483\n 3675524757 3637643391 2093725246 3500121902 291758869 1652675998\n 1139040273 1626503079 1329269718 3800218668 1301440229 4094598479\n 2030419032 2206069114 2167504310 3568823651 1480132672 4189195270\n 1003514971 2108978250 4235920891 1015223357 1328980599 3065593845\n 772959451 1736648400 1111173855 2673522945 3202521755 1515315402\n 1025987717 2556593896 1098413506 3353399904 2969501057 2094670114\n 2847919939 3042807578 2837794286 1675561875 2905519122 4265188297\n 2610926124 846285729 2241003777 2845770412 2129473060 3762815768\n 2144316967 1546390655 1870814520 1524713984 3716398313 3346580439\n 507361322 3071157273 715371311 667081236 1562427246 1416032086\n 2719153631 1214541502 3927763433 4093412577 1609261242 1472085592\n 2916826031 2284397012 4029669634 4115943418 618581971 2078599894\n 2195634027 568626950 551593208 1404161907 4048083862 206856294\n 2947194844 2767249973 1603907667 1631351803 1522568516 3530861276\n 932299423 1409409376 1006753259 2778802782 2428826612 4160546743\n 1957871534 3303229622 2827456021 2670092224 3383794488 945607414\n 1983832766 358657548 4271708270 755497396 3434273208 1362230140\n 1689853703 801994005 3746197505 3596436611 1542752314 3254670338\n 1548922657 4130814301 3943625172 221411986 713064282 4233062979\n 4075891970 2437106728 544573526 3064910259 151483803 689855214\n 1545945006 3683633364 3289795997 329789217 4168762065 3787243687\n 2265695874 39834191 1266893307 1324209011 1243246540 3973960372\n 541659911 1362379416 1601251635 1863255185 3125665448 4219983083\n 2682202466 567260347 1405575843 3420495303 2758288434 3586390223\n 336221788 2630832173 3573336941 1218191945 2213154892 2821992107\n 3693992851 696758711 279252507 2892498320 1862489732 255938916\n 1661968992 3484941106 4082783555 2936202287 3514506417 1758172715\n 517257860 3411688455 3637760904 1419041484 2137852375 757229925\n 3065881553 339906360 661617426 760623637 1046610245 876310326\n 1014024268 3183719419 2438284349 1776461276 3594106675 2854090739\n 2602159385 4289618092 2857553425 695761542 3083079398 76408317\n 3254461403 153861699 2129194363 2941386031 4094753231 1156889483\n 2242959746 1437290897 1982676962 1514123682 1353077347 1818424511\n 2436251404 4085735581 3547311726 4033030170 2353214711 479069124\n 241656432 4085762125 551929572 2454945299 132044757 524232234\n 3799812788 6269782 1338614034 3443833252 1258856457 4165168463\n 355192100 2534159709 2851727269 2518795790 3366162664 3414356452\n 1869549905 2688919231 3437293505 2606459835 902202159 3721325246\n 1701626821 1124672137 2815257054 3602219523 1714310200 949851574\n 2336520456 504372525 1144232445 195534505 4020833259 946396359\n 1559665603 3237234635 826432554 799463637 3769666381 3612718603\n 762518382 2954474157 4052494419 672053721 1345549799 1873779721\n 2936068468 972026843 1411934901 2952294227 2546812485 2659826516\n 2834428224 2455667549 4281380303 2345320401 3932855189 309111429\n 834893265 2699122382 2146331862 3207660078 1202940344 1030469978\n 3945221344 2900087534 722371964 3096315981 1621397645 907802015\n 450035999 2628913824 3160204880 3961963201 349350642 3107386851\n 560688431 2098806006 3142991583 79041694 2234561220 122454157]",
640
+ 624,
641
+ 0,
642
+ 0.0
643
+ ],
644
+ "mlx": 1756905873,
645
+ "mlx_key": [
646
+ 0,
647
+ 1756903229
648
+ ]
649
+ },
650
+ "training_args_snapshot": {
651
+ "output_dir": "outy1266_align_last7",
652
+ "max_kv_size": 1536,
653
+ "model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen3-4B-MLX-8bit",
654
+ "ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/InferenceIllusionist/gpt-oss-20b-MLX-4bit",
655
+ "draft_model_path": null,
656
+ "benchmark_every": 0,
657
+ "benchmark_dataset": "gsm8k",
658
+ "benchmark_dataset_config": "main",
659
+ "benchmark_split": "test",
660
+ "benchmark_samples": 10,
661
+ "benchmark_prompt_key": "question",
662
+ "benchmark_answer_key": "answer",
663
+ "benchmark_max_new_tokens": 196,
664
+ "benchmark_temperature": 0.0,
665
+ "benchmark_top_p": 1.0,
666
+ "benchmark_top_k": 0,
667
+ "benchmark_use_chat_template": true,
668
+ "benchmark_stop_on_error": false,
669
+ "min_think_tokens": 32,
670
+ "think_end_early_bias": -12.0,
671
+ "bias_answer_start_after_min_think": true,
672
+ "train_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/train.jsonl",
673
+ "val_dataset_path": null,
674
+ "dataset_name": null,
675
+ "dataset_config": null,
676
+ "dataset_train_split": "train",
677
+ "dataset_val_split": "test",
678
+ "dataset_prompt_key": "prompt",
679
+ "dataset_answer_key": "completion",
680
+ "max_prompt_len": 350,
681
+ "max_gen_len": 128,
682
+ "system_prompt": null,
683
+ "think_start_tag": "<think>",
684
+ "think_end_tag": "</think>",
685
+ "answer_start_tag": "<answer>",
686
+ "answer_end_tag": "</answer>",
687
+ "think_boost_tokens": 24,
688
+ "think_temperature": 0.15,
689
+ "answer_temperature": 0.1,
690
+ "sampling_top_p": 0.6,
691
+ "sampling_min_p": 0.05,
692
+ "sampling_top_k": 40,
693
+ "repetition_penalty": 1.15,
694
+ "repetition_context_size": 64,
695
+ "hard_mask_mcq_first_token": true,
696
+ "mcq_letter_lift": 10.0,
697
+ "mcq_ban_first_bias": -14.0,
698
+ "nonmcq_ban_first_bias": -10.0,
699
+ "mcq_close_after_k": 1,
700
+ "min_answer_tokens": 6,
701
+ "min_answer_tokens_mcq": 1,
702
+ "bias_close_think": 6.0,
703
+ "bias_answer_start": 3.0,
704
+ "punish_reopen_think": -3.0,
705
+ "punish_extra_think_end": -6.0,
706
+ "bias_eos_after_answer": 4.0,
707
+ "allow_tool_calls": false,
708
+ "tool_call_penalty": 1.0,
709
+ "reward_content_type": "smart",
710
+ "reward_format_weight": 0.2,
711
+ "reward_content_weight": 0.7,
712
+ "think_reward_weight": 0.1,
713
+ "think_len_min": 16,
714
+ "think_len_max": 64,
715
+ "use_lora": false,
716
+ "num_rollout_samples": 3,
717
+ "ppo_batch_size": 1,
718
+ "grpo_beta": 0.04,
719
+ "learning_rate": 1.4e-06,
720
+ "optimizer_beta1": 0.9,
721
+ "optimizer_beta2": 0.95,
722
+ "optimizer_weight_decay": 0.01,
723
+ "grad_clip_norm": 0.35,
724
+ "save_optimizer_state": false,
725
+ "lr_schedule_config": {
726
+ "name": "cosine_decay",
727
+ "arguments": [
728
+ 1.4e-06,
729
+ 60000,
730
+ 2e-07
731
+ ],
732
+ "warmup": 4000,
733
+ "warmup_init": 2e-07
734
+ },
735
+ "grad_accum_steps": 2,
736
+ "num_training_steps": 45869,
737
+ "save_every": 10,
738
+ "eval_every": 0,
739
+ "seed": 15572,
740
+ "shuffle_data": true,
741
+ "use_grad_checkpointing": false,
742
+ "grad_checkpoint_layers": 0,
743
+ "log_samples_every": 1,
744
+ "max_logged_samples": 50,
745
+ "log_prompts": true,
746
+ "sample_log_path": null,
747
+ "kv_bits": 0,
748
+ "kv_group_size": 64,
749
+ "quantized_kv_start": 0,
750
+ "verbose": true,
751
+ "use_wandb": true,
752
+ "wandb_project": "reasonable-qwen-4b-mlxv2isi",
753
+ "wandb_entity": null,
754
+ "wandb_run_name": null,
755
+ "resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last7/checkpoint_20250903_224014_periodic_update_2510",
756
+ "allow_cross_arch_ref": true,
757
+ "align_bridge_path": null,
758
+ "align_bridge_weight": 1.0,
759
+ "align_pool": "mean",
760
+ "align_after_tag": "<|start|>assistant<|channel|>analysis<|message|>",
761
+ "effective_batch_size": 4
762
+ }
763
+ }
checkpoint_20250903_232422_periodic_update_2600/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232551_exit_request_update_2602/added_tokens.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</answer>": 151669,
3
+ "</img_base64>": 151670,
4
+ "</json_output>": 151671,
5
+ "</ocr_text>": 151672,
6
+ "</think>": 151668,
7
+ "</tool_call>": 151658,
8
+ "</tool_code>": 151673,
9
+ "</tool_response>": 151666,
10
+ "<answer>": 151674,
11
+ "<img_base64>": 151675,
12
+ "<json_output>": 151676,
13
+ "<ocr_text>": 151677,
14
+ "<think>": 151667,
15
+ "<tool_call>": 151657,
16
+ "<tool_code>": 151678,
17
+ "<tool_response>": 151665,
18
+ "<|box_end|>": 151649,
19
+ "<|box_start|>": 151648,
20
+ "<|endoftext|>": 151643,
21
+ "<|file_sep|>": 151664,
22
+ "<|fim_middle|>": 151660,
23
+ "<|fim_pad|>": 151662,
24
+ "<|fim_prefix|>": 151659,
25
+ "<|fim_suffix|>": 151661,
26
+ "<|im_end|>": 151645,
27
+ "<|im_start|>": 151644,
28
+ "<|image_pad|>": 151655,
29
+ "<|object_ref_end|>": 151647,
30
+ "<|object_ref_start|>": 151646,
31
+ "<|quad_end|>": 151651,
32
+ "<|quad_start|>": 151650,
33
+ "<|repo_name|>": 151663,
34
+ "<|video_pad|>": 151656,
35
+ "<|vision_end|>": 151653,
36
+ "<|vision_pad|>": 151654,
37
+ "<|vision_start|>": 151652
38
+ }
checkpoint_20250903_232551_exit_request_update_2602/chat_template.jinja ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- set tool_start = "<tool_response>" %}
21
+ {%- set tool_start_length = tool_start|length %}
22
+ {%- set start_of_message = message.content[:tool_start_length] %}
23
+ {%- set tool_end = "</tool_response>" %}
24
+ {%- set tool_end_length = tool_end|length %}
25
+ {%- set start_pos = (message.content|length) - tool_end_length %}
26
+ {%- if start_pos < 0 %}
27
+ {%- set start_pos = 0 %}
28
+ {%- endif %}
29
+ {%- set end_of_message = message.content[start_pos:] %}
30
+ {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
31
+ {%- set ns.multi_step_tool = false %}
32
+ {%- set ns.last_query_index = index %}
33
+ {%- endif %}
34
+ {%- endfor %}
35
+ {%- for message in messages %}
36
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
37
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
38
+ {%- elif message.role == "assistant" %}
39
+ {%- set content = message.content %}
40
+ {%- set reasoning_content = '' %}
41
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
42
+ {%- set reasoning_content = message.reasoning_content %}
43
+ {%- else %}
44
+ {%- if '</think>' in message.content %}
45
+ {%- set content = (message.content.split('</think>')|last).lstrip('\n') %}
46
+ {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\n') %}
47
+ {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
48
+ {%- endif %}
49
+ {%- endif %}
50
+ {%- if loop.index0 > ns.last_query_index %}
51
+ {%- if loop.last or (not loop.last and reasoning_content) %}
52
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
53
+ {%- else %}
54
+ {{- '<|im_start|>' + message.role + '\n' + content }}
55
+ {%- endif %}
56
+ {%- else %}
57
+ {{- '<|im_start|>' + message.role + '\n' + content }}
58
+ {%- endif %}
59
+ {%- if message.tool_calls %}
60
+ {%- for tool_call in message.tool_calls %}
61
+ {%- if (loop.first and content) or (not loop.first) %}
62
+ {{- '\n' }}
63
+ {%- endif %}
64
+ {%- if tool_call.function %}
65
+ {%- set tool_call = tool_call.function %}
66
+ {%- endif %}
67
+ {{- '<tool_call>\n{"name": "' }}
68
+ {{- tool_call.name }}
69
+ {{- '", "arguments": ' }}
70
+ {%- if tool_call.arguments is string %}
71
+ {{- tool_call.arguments }}
72
+ {%- else %}
73
+ {{- tool_call.arguments | tojson }}
74
+ {%- endif %}
75
+ {{- '}\n</tool_call>' }}
76
+ {%- endfor %}
77
+ {%- endif %}
78
+ {{- '<|im_end|>\n' }}
79
+ {%- elif message.role == "tool" %}
80
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
81
+ {{- '<|im_start|>user' }}
82
+ {%- endif %}
83
+ {{- '\n<tool_response>\n' }}
84
+ {{- message.content }}
85
+ {{- '\n</tool_response>' }}
86
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
87
+ {{- '<|im_end|>\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|im_start|>assistant\n' }}
93
+ {%- if enable_thinking is defined and enable_thinking is false %}
94
+ {{- '<think>\n\n</think>\n\n' }}
95
+ {%- endif %}
96
+ {%- endif %}
checkpoint_20250903_232551_exit_request_update_2602/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2560,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 9728,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "qwen3",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 36,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
checkpoint_20250903_232551_exit_request_update_2602/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232551_exit_request_update_2602/special_tokens_map.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "</answer>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</img_base64>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "</json_output>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</ocr_text>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "</think>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "</tool_code>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<answer>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<img_base64>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<json_output>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<ocr_text>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<think>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<tool_code>",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ }
87
+ ],
88
+ "eos_token": {
89
+ "content": "<|im_end|>",
90
+ "lstrip": false,
91
+ "normalized": false,
92
+ "rstrip": false,
93
+ "single_word": false
94
+ },
95
+ "pad_token": {
96
+ "content": "<|endoftext|>",
97
+ "lstrip": false,
98
+ "normalized": false,
99
+ "rstrip": false,
100
+ "single_word": false
101
+ }
102
+ }
checkpoint_20250903_232551_exit_request_update_2602/tokenizer_config.json ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": true
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": true
212
+ },
213
+ "151669": {
214
+ "content": "</answer>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "151670": {
222
+ "content": "</img_base64>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "151671": {
230
+ "content": "</json_output>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "151672": {
238
+ "content": "</ocr_text>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "151673": {
246
+ "content": "</tool_code>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "151674": {
254
+ "content": "<answer>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "151675": {
262
+ "content": "<img_base64>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": true
268
+ },
269
+ "151676": {
270
+ "content": "<json_output>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": true
276
+ },
277
+ "151677": {
278
+ "content": "<ocr_text>",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": true
284
+ },
285
+ "151678": {
286
+ "content": "<tool_code>",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": true
292
+ }
293
+ },
294
+ "additional_special_tokens": [
295
+ "</answer>",
296
+ "</img_base64>",
297
+ "</json_output>",
298
+ "</ocr_text>",
299
+ "</think>",
300
+ "</tool_code>",
301
+ "<answer>",
302
+ "<img_base64>",
303
+ "<json_output>",
304
+ "<ocr_text>",
305
+ "<think>",
306
+ "<tool_code>"
307
+ ],
308
+ "bos_token": null,
309
+ "clean_up_tokenization_spaces": false,
310
+ "eos_token": "<|im_end|>",
311
+ "errors": "replace",
312
+ "extra_special_tokens": {},
313
+ "model_max_length": 131072,
314
+ "pad_token": "<|endoftext|>",
315
+ "split_special_tokens": false,
316
+ "tokenizer_class": "Qwen2Tokenizer",
317
+ "unk_token": null
318
+ }
checkpoint_20250903_232551_exit_request_update_2602/training_state.json ADDED
@@ -0,0 +1,763 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 2602,
3
+ "num_updates": 2602,
4
+ "use_lora": false,
5
+ "rng_state": {
6
+ "python": [
7
+ 3,
8
+ [
9
+ 3228212754,
10
+ 279998097,
11
+ 2056665714,
12
+ 3603597067,
13
+ 3476614728,
14
+ 3031000,
15
+ 2954941703,
16
+ 3205203341,
17
+ 1022055790,
18
+ 1415253971,
19
+ 2181335448,
20
+ 3582899399,
21
+ 1644250258,
22
+ 1793404199,
23
+ 2566317855,
24
+ 109101073,
25
+ 3150506052,
26
+ 2724067265,
27
+ 97547420,
28
+ 2995366220,
29
+ 3519916584,
30
+ 3209457352,
31
+ 3126309277,
32
+ 3451453441,
33
+ 1702671318,
34
+ 2107243699,
35
+ 2422777587,
36
+ 360391179,
37
+ 585237960,
38
+ 2832187814,
39
+ 2662498495,
40
+ 2394734758,
41
+ 1444276186,
42
+ 1837837410,
43
+ 961418280,
44
+ 1195482276,
45
+ 935680843,
46
+ 3968754582,
47
+ 2211483879,
48
+ 1837545159,
49
+ 2419172187,
50
+ 2042898634,
51
+ 2665785964,
52
+ 356867850,
53
+ 3385622908,
54
+ 3868724832,
55
+ 2977197007,
56
+ 1479685303,
57
+ 1540416526,
58
+ 331944145,
59
+ 3530610791,
60
+ 2842657301,
61
+ 2372012930,
62
+ 639325304,
63
+ 3360783663,
64
+ 3762064881,
65
+ 2160841949,
66
+ 4001870304,
67
+ 1864777350,
68
+ 717368547,
69
+ 362746266,
70
+ 3779466655,
71
+ 1960142933,
72
+ 1723756462,
73
+ 1326392635,
74
+ 2695751926,
75
+ 1728155752,
76
+ 2347807318,
77
+ 1862557049,
78
+ 3999800477,
79
+ 2277115301,
80
+ 1516014806,
81
+ 3846995662,
82
+ 2388310657,
83
+ 2859396105,
84
+ 1441398545,
85
+ 3439746988,
86
+ 3678365781,
87
+ 190759243,
88
+ 4075004972,
89
+ 342634350,
90
+ 2045571341,
91
+ 557542918,
92
+ 2240372410,
93
+ 1982579501,
94
+ 255722053,
95
+ 1496213542,
96
+ 2563043770,
97
+ 620403458,
98
+ 3666797179,
99
+ 4194690277,
100
+ 1725488508,
101
+ 2427139442,
102
+ 911138792,
103
+ 2810519096,
104
+ 403900489,
105
+ 1235282796,
106
+ 3323510948,
107
+ 3976438655,
108
+ 2592317228,
109
+ 1469307213,
110
+ 456462311,
111
+ 3393494366,
112
+ 669420558,
113
+ 1939678322,
114
+ 4073521067,
115
+ 3342970892,
116
+ 2452710290,
117
+ 2793129860,
118
+ 1342676286,
119
+ 2394512596,
120
+ 1832972552,
121
+ 3814703913,
122
+ 945666136,
123
+ 3552696630,
124
+ 3165169504,
125
+ 4234896064,
126
+ 3288485605,
127
+ 2637492903,
128
+ 3688384962,
129
+ 1693185353,
130
+ 3041897498,
131
+ 3666651581,
132
+ 216719692,
133
+ 2101621578,
134
+ 1056505155,
135
+ 3806530083,
136
+ 2345205292,
137
+ 2868630622,
138
+ 3289598319,
139
+ 597570811,
140
+ 2632481252,
141
+ 3875619652,
142
+ 289253672,
143
+ 77311731,
144
+ 4133169138,
145
+ 3462637509,
146
+ 2047139049,
147
+ 109094532,
148
+ 1309238588,
149
+ 4183047643,
150
+ 3849080966,
151
+ 2612177601,
152
+ 2747398543,
153
+ 2317389804,
154
+ 2206126400,
155
+ 3538931825,
156
+ 4015230236,
157
+ 1548164965,
158
+ 3615557249,
159
+ 3874732623,
160
+ 383396796,
161
+ 78135062,
162
+ 836187159,
163
+ 1405060375,
164
+ 4124734680,
165
+ 2283841137,
166
+ 130389111,
167
+ 2370256028,
168
+ 3117432748,
169
+ 2188669863,
170
+ 1274090654,
171
+ 136753743,
172
+ 3535123905,
173
+ 928699189,
174
+ 2096609090,
175
+ 140690583,
176
+ 1335216202,
177
+ 1664118110,
178
+ 370920611,
179
+ 3381310767,
180
+ 635177978,
181
+ 919944943,
182
+ 1794890933,
183
+ 3785921605,
184
+ 988623168,
185
+ 3586512205,
186
+ 3578229114,
187
+ 2227888166,
188
+ 1199377973,
189
+ 198574987,
190
+ 3359460270,
191
+ 41421261,
192
+ 3818002247,
193
+ 1611622405,
194
+ 1172309347,
195
+ 1127078025,
196
+ 864036890,
197
+ 3821592921,
198
+ 1074908116,
199
+ 4045652492,
200
+ 150578148,
201
+ 1490173923,
202
+ 1255586022,
203
+ 3620141376,
204
+ 1119552840,
205
+ 1604539422,
206
+ 2899876376,
207
+ 1634692146,
208
+ 2430160935,
209
+ 3525965146,
210
+ 3634230653,
211
+ 2801553134,
212
+ 2236757588,
213
+ 3209439651,
214
+ 638937059,
215
+ 332630833,
216
+ 2099354597,
217
+ 2453040579,
218
+ 2294611386,
219
+ 3583062819,
220
+ 3057053813,
221
+ 2651436307,
222
+ 820127535,
223
+ 2234442340,
224
+ 2926763826,
225
+ 2608312654,
226
+ 622663536,
227
+ 1327274392,
228
+ 579028969,
229
+ 1638676546,
230
+ 725757522,
231
+ 2287475756,
232
+ 2245008208,
233
+ 583117806,
234
+ 1681911299,
235
+ 4043302089,
236
+ 2733469006,
237
+ 3482246612,
238
+ 1699131086,
239
+ 2009286409,
240
+ 1012929586,
241
+ 1831443753,
242
+ 3401815932,
243
+ 2335754234,
244
+ 1338527095,
245
+ 1234451965,
246
+ 2260706742,
247
+ 3195944902,
248
+ 933824426,
249
+ 3055004187,
250
+ 442858759,
251
+ 3568329250,
252
+ 2010957609,
253
+ 2249247272,
254
+ 1258950970,
255
+ 205311363,
256
+ 607774665,
257
+ 2098340782,
258
+ 3806387601,
259
+ 1242750989,
260
+ 3609867707,
261
+ 241559992,
262
+ 2980673848,
263
+ 249731274,
264
+ 2840307599,
265
+ 3768697067,
266
+ 2786582312,
267
+ 234722404,
268
+ 501116625,
269
+ 3883427569,
270
+ 2178103353,
271
+ 2378845018,
272
+ 4115296692,
273
+ 870819025,
274
+ 4131469458,
275
+ 4141674918,
276
+ 2078498292,
277
+ 3999732609,
278
+ 2537021868,
279
+ 3558720844,
280
+ 3704987724,
281
+ 398283175,
282
+ 1690550218,
283
+ 2326228611,
284
+ 2685433390,
285
+ 3899703572,
286
+ 3902620680,
287
+ 2509824424,
288
+ 1513684241,
289
+ 2524739654,
290
+ 3117375505,
291
+ 1379082516,
292
+ 2734650653,
293
+ 2355589146,
294
+ 2451030867,
295
+ 2887987439,
296
+ 2131990321,
297
+ 2214951112,
298
+ 4219817646,
299
+ 1537640525,
300
+ 3339366035,
301
+ 2845436015,
302
+ 3914197935,
303
+ 2658843500,
304
+ 3766601378,
305
+ 1182641028,
306
+ 1457188584,
307
+ 4153939547,
308
+ 2335418690,
309
+ 1152149210,
310
+ 870429227,
311
+ 4230760942,
312
+ 1976040608,
313
+ 785192739,
314
+ 184789279,
315
+ 1377261339,
316
+ 707722660,
317
+ 3558575032,
318
+ 53709080,
319
+ 805111893,
320
+ 4277091928,
321
+ 362164270,
322
+ 3343191968,
323
+ 764658199,
324
+ 1099715208,
325
+ 4090812555,
326
+ 68096351,
327
+ 627278739,
328
+ 1254763496,
329
+ 4216318088,
330
+ 1188064625,
331
+ 3429733508,
332
+ 3022473048,
333
+ 3847088807,
334
+ 3053202734,
335
+ 3740308065,
336
+ 1993837633,
337
+ 763503311,
338
+ 1037390932,
339
+ 588432609,
340
+ 1536894725,
341
+ 1306273771,
342
+ 2543433092,
343
+ 1004993106,
344
+ 700767389,
345
+ 2357318127,
346
+ 3169490527,
347
+ 3918469492,
348
+ 2033525981,
349
+ 519262086,
350
+ 1129257868,
351
+ 2376802674,
352
+ 2854805533,
353
+ 3880903346,
354
+ 74109069,
355
+ 2008994945,
356
+ 4114401423,
357
+ 2811556615,
358
+ 3471501456,
359
+ 837319289,
360
+ 2953139467,
361
+ 2327107742,
362
+ 133550557,
363
+ 3916368228,
364
+ 2552796922,
365
+ 1537772292,
366
+ 1833298346,
367
+ 2009274777,
368
+ 424528351,
369
+ 2717359636,
370
+ 2825961025,
371
+ 1861953389,
372
+ 2884908497,
373
+ 3801183979,
374
+ 1856316371,
375
+ 2235711410,
376
+ 2988382290,
377
+ 1187502951,
378
+ 3671488340,
379
+ 1954214264,
380
+ 2964041676,
381
+ 1604199356,
382
+ 3311430473,
383
+ 1009672891,
384
+ 1165005921,
385
+ 3302391626,
386
+ 2660756877,
387
+ 892284552,
388
+ 2393343854,
389
+ 3816239257,
390
+ 3383925622,
391
+ 2632741631,
392
+ 150411860,
393
+ 4211331326,
394
+ 2563945928,
395
+ 3147050596,
396
+ 86291816,
397
+ 1306956905,
398
+ 3702342117,
399
+ 221494470,
400
+ 2274829119,
401
+ 3628261915,
402
+ 399492089,
403
+ 1039477125,
404
+ 3636665160,
405
+ 1499731795,
406
+ 2521019510,
407
+ 3820576314,
408
+ 651306401,
409
+ 2222447223,
410
+ 762476894,
411
+ 3372134685,
412
+ 2773485657,
413
+ 1506070889,
414
+ 166626664,
415
+ 2296565478,
416
+ 2627756446,
417
+ 1432061762,
418
+ 875491433,
419
+ 3481499302,
420
+ 647568111,
421
+ 1079088546,
422
+ 3657637708,
423
+ 1156169451,
424
+ 1864010770,
425
+ 2499694803,
426
+ 1857267438,
427
+ 2291220436,
428
+ 3324416067,
429
+ 2904742373,
430
+ 1611590803,
431
+ 3124040330,
432
+ 2795308170,
433
+ 3443049470,
434
+ 2264039167,
435
+ 3511022788,
436
+ 2491483558,
437
+ 3811739223,
438
+ 3115068110,
439
+ 3279986730,
440
+ 3570889482,
441
+ 335123233,
442
+ 360967562,
443
+ 311303413,
444
+ 2380553530,
445
+ 1843925797,
446
+ 947441595,
447
+ 3427872459,
448
+ 2885833189,
449
+ 79715633,
450
+ 514452158,
451
+ 1736537499,
452
+ 3982473220,
453
+ 1088572403,
454
+ 2384199361,
455
+ 1784448850,
456
+ 2419864392,
457
+ 639538932,
458
+ 2112974083,
459
+ 3934260396,
460
+ 2850658226,
461
+ 585502134,
462
+ 1760060627,
463
+ 3534598283,
464
+ 118824413,
465
+ 799460671,
466
+ 1478757883,
467
+ 3050258322,
468
+ 2579705998,
469
+ 3133048451,
470
+ 2223823746,
471
+ 603685429,
472
+ 2387682555,
473
+ 3009556562,
474
+ 1521901351,
475
+ 2646225121,
476
+ 1773849074,
477
+ 3347873314,
478
+ 1918004826,
479
+ 1726862757,
480
+ 2513387996,
481
+ 508242897,
482
+ 339225034,
483
+ 1805609427,
484
+ 323575129,
485
+ 765632792,
486
+ 4218304970,
487
+ 296616831,
488
+ 880218140,
489
+ 2165587941,
490
+ 2069055842,
491
+ 110437221,
492
+ 1281149935,
493
+ 3527927111,
494
+ 4246842114,
495
+ 501470319,
496
+ 110693585,
497
+ 2361144593,
498
+ 958597015,
499
+ 540902541,
500
+ 2351458930,
501
+ 3824880566,
502
+ 2412074905,
503
+ 1526349815,
504
+ 2951752081,
505
+ 1577401122,
506
+ 645843044,
507
+ 1122678576,
508
+ 3870028103,
509
+ 3563016932,
510
+ 2394128327,
511
+ 1412316709,
512
+ 1770485652,
513
+ 3328500527,
514
+ 2153223048,
515
+ 1571141422,
516
+ 1950096991,
517
+ 3843465276,
518
+ 1189099356,
519
+ 1707319037,
520
+ 1312370001,
521
+ 4085046861,
522
+ 3231557091,
523
+ 3340822452,
524
+ 1898079545,
525
+ 1373556942,
526
+ 891254598,
527
+ 543958551,
528
+ 3929023245,
529
+ 3262642994,
530
+ 3221866934,
531
+ 2390598216,
532
+ 1036903094,
533
+ 2097686434,
534
+ 129207147,
535
+ 2964160713,
536
+ 1881698322,
537
+ 1515645930,
538
+ 3226263079,
539
+ 1986344504,
540
+ 3005241002,
541
+ 3923005616,
542
+ 1430681832,
543
+ 2048310876,
544
+ 631221366,
545
+ 2972301268,
546
+ 3276451436,
547
+ 3841702416,
548
+ 2585920783,
549
+ 4070240888,
550
+ 3697275337,
551
+ 564704448,
552
+ 266488781,
553
+ 3252391941,
554
+ 2796272702,
555
+ 2665303656,
556
+ 3413456714,
557
+ 2470069594,
558
+ 2470272528,
559
+ 1660836326,
560
+ 991966684,
561
+ 3814972761,
562
+ 1794669421,
563
+ 892478324,
564
+ 461928726,
565
+ 2461293569,
566
+ 561261597,
567
+ 3704935953,
568
+ 1737302340,
569
+ 4141207295,
570
+ 1072950705,
571
+ 1100316204,
572
+ 49599,
573
+ 3655686352,
574
+ 2604516846,
575
+ 169477890,
576
+ 1552495902,
577
+ 4076641781,
578
+ 4288801538,
579
+ 2661645907,
580
+ 2681760413,
581
+ 3191805957,
582
+ 1555588618,
583
+ 2005446622,
584
+ 2876389060,
585
+ 2506534665,
586
+ 3737615325,
587
+ 539844861,
588
+ 2614598422,
589
+ 3169363989,
590
+ 1460455376,
591
+ 3597830757,
592
+ 79929582,
593
+ 1950458365,
594
+ 4188478473,
595
+ 675585740,
596
+ 905482938,
597
+ 3092725,
598
+ 3333208631,
599
+ 2096209247,
600
+ 1647933404,
601
+ 2581635632,
602
+ 1778498943,
603
+ 3092521474,
604
+ 988446911,
605
+ 1790098568,
606
+ 2163371370,
607
+ 3874214587,
608
+ 3811993331,
609
+ 3505387423,
610
+ 2828235272,
611
+ 1132675285,
612
+ 4047174618,
613
+ 2760040098,
614
+ 3320824721,
615
+ 202201724,
616
+ 1355357947,
617
+ 627906198,
618
+ 4127456551,
619
+ 2431589489,
620
+ 675868086,
621
+ 2865627058,
622
+ 4123612491,
623
+ 4047286524,
624
+ 747101435,
625
+ 1216754111,
626
+ 2427503810,
627
+ 3514051898,
628
+ 452300667,
629
+ 2349273222,
630
+ 2983441288,
631
+ 1420412231,
632
+ 2035374170,
633
+ 225
634
+ ],
635
+ null
636
+ ],
637
+ "numpy": [
638
+ "MT19937",
639
+ "[ 15572 980513701 2334715163 3585534944 1822198675 158479007\n 1300107201 2003433159 424170022 4102602503 2437447838 1924282775\n 2084306490 4132823124 4216394081 1526156729 4231078312 3658730376\n 3599347945 3798337125 544676946 3949203055 1596292274 2255158710\n 703032348 636265253 2880318131 3345387760 162413307 2418710564\n 3712245020 2175226970 563044056 2939814745 2838234633 468141434\n 616739654 564867267 2130155541 815641611 601811839 2004017220\n 3627706467 3951463947 810570068 3028421201 454655469 3270345648\n 555008207 3255294172 3259033389 429183833 272696145 2007214122\n 2243779629 1934853570 517873959 1769075612 2057249323 825685197\n 21711389 271106734 3943034084 3547272802 1718926725 3289803093\n 2224067888 3644890877 3431377018 1754806530 2376472949 2892610362\n 1500501344 3824621710 1417356523 4122790557 775716514 1813030967\n 3994108828 391693578 1388189506 1179060416 1727839607 3646219745\n 3467814014 1642908326 1500533561 1281477999 2139613886 209245364\n 1449934686 3593983958 693904485 999778614 847538348 922048571\n 1218470567 916177375 1196217437 3715695791 3572687936 2177952212\n 2976499668 1502043605 3295312140 473156180 3489777550 4116262924\n 726728023 266896064 1207195182 1422796794 3298302327 2546802485\n 3089285157 4087066326 281999229 3833891270 4133996591 3393553875\n 1790581640 1088958086 372349629 1150038540 3967005853 3449868499\n 2783031950 745299387 4177989191 440862037 1630705675 3432601205\n 3976086290 2477494074 2631694750 55867155 3381732871 3988268061\n 4190916192 3426936986 1292805609 2800262421 1433946529 2604379311\n 3803100625 2130255467 4134910564 3389303350 912295037 1986407142\n 60132537 1465812628 2556679777 768839848 561642210 2962288069\n 2900250399 2446769166 2830015834 1820339287 3836052622 3843539266\n 3448899655 719693687 2608513431 807017838 705657612 1313405027\n 308572498 3011745795 3544875535 3662789479 792949536 1679330381\n 2262304426 3714222327 3252067572 3530366244 2847367189 1818638534\n 4196918839 1197188921 1714287054 3610324578 1759524349 658319653\n 4062775635 2170322251 3246948233 467741743 2311362121 1326083926\n 2215935251 2860151158 3543259014 4288886076 1000061343 35607424\n 3800789574 1024328448 2871144392 1452668283 2547917804 794856369\n 3652179617 850026679 66787329 99615284 3360338772 2597540132\n 1809210064 3923947080 4257343339 372176093 3483099399 721873771\n 1101276431 2834898271 76176267 4200628658 2773607601 3516401276\n 3454896121 2354147457 1223458703 3128119783 529915195 2585910314\n 3544532140 551469296 3014823830 3511999051 1463986000 3624754398\n 704833819 3852626834 2711357985 1324465084 1627535231 570708901\n 2717601289 725003848 1942965082 328822490 3620824822 1651096734\n 2785116849 1685019798 1956468619 3534906475 922043906 3007951301\n 4238844799 755293417 3011697131 446474988 2478554140 2374977239\n 1380053003 2243890933 2686719863 2130566958 3840030097 1793607073\n 3796244776 2639065600 3001847252 1477803129 1197364324 2384746982\n 766409730 2722234329 3102942583 2032952634 961381465 1104664880\n 431680105 3809455490 596671995 2974418974 2572169252 3211875863\n 1700476003 2461146565 4138090399 3571815849 2874279728 3348404697\n 1894538786 1654054896 1861571639 643113073 1024018105 110160162\n 3688705424 1588307750 1010684555 1071478016 3908839978 1361810520\n 4077136201 1942152543 3862285636 788921650 3325732842 2086596894\n 2354559341 920752382 2089296746 412564844 3783133394 3542635692\n 151261507 2024582056 27012071 3714904670 2251973869 3415653000\n 3122199829 1793993298 3504479999 2494502573 3472923469 628884745\n 400477905 2068607674 2511099917 76006962 4127192322 656421806\n 2099151600 4055081824 3120853595 1902231402 1793968517 2739152483\n 3675524757 3637643391 2093725246 3500121902 291758869 1652675998\n 1139040273 1626503079 1329269718 3800218668 1301440229 4094598479\n 2030419032 2206069114 2167504310 3568823651 1480132672 4189195270\n 1003514971 2108978250 4235920891 1015223357 1328980599 3065593845\n 772959451 1736648400 1111173855 2673522945 3202521755 1515315402\n 1025987717 2556593896 1098413506 3353399904 2969501057 2094670114\n 2847919939 3042807578 2837794286 1675561875 2905519122 4265188297\n 2610926124 846285729 2241003777 2845770412 2129473060 3762815768\n 2144316967 1546390655 1870814520 1524713984 3716398313 3346580439\n 507361322 3071157273 715371311 667081236 1562427246 1416032086\n 2719153631 1214541502 3927763433 4093412577 1609261242 1472085592\n 2916826031 2284397012 4029669634 4115943418 618581971 2078599894\n 2195634027 568626950 551593208 1404161907 4048083862 206856294\n 2947194844 2767249973 1603907667 1631351803 1522568516 3530861276\n 932299423 1409409376 1006753259 2778802782 2428826612 4160546743\n 1957871534 3303229622 2827456021 2670092224 3383794488 945607414\n 1983832766 358657548 4271708270 755497396 3434273208 1362230140\n 1689853703 801994005 3746197505 3596436611 1542752314 3254670338\n 1548922657 4130814301 3943625172 221411986 713064282 4233062979\n 4075891970 2437106728 544573526 3064910259 151483803 689855214\n 1545945006 3683633364 3289795997 329789217 4168762065 3787243687\n 2265695874 39834191 1266893307 1324209011 1243246540 3973960372\n 541659911 1362379416 1601251635 1863255185 3125665448 4219983083\n 2682202466 567260347 1405575843 3420495303 2758288434 3586390223\n 336221788 2630832173 3573336941 1218191945 2213154892 2821992107\n 3693992851 696758711 279252507 2892498320 1862489732 255938916\n 1661968992 3484941106 4082783555 2936202287 3514506417 1758172715\n 517257860 3411688455 3637760904 1419041484 2137852375 757229925\n 3065881553 339906360 661617426 760623637 1046610245 876310326\n 1014024268 3183719419 2438284349 1776461276 3594106675 2854090739\n 2602159385 4289618092 2857553425 695761542 3083079398 76408317\n 3254461403 153861699 2129194363 2941386031 4094753231 1156889483\n 2242959746 1437290897 1982676962 1514123682 1353077347 1818424511\n 2436251404 4085735581 3547311726 4033030170 2353214711 479069124\n 241656432 4085762125 551929572 2454945299 132044757 524232234\n 3799812788 6269782 1338614034 3443833252 1258856457 4165168463\n 355192100 2534159709 2851727269 2518795790 3366162664 3414356452\n 1869549905 2688919231 3437293505 2606459835 902202159 3721325246\n 1701626821 1124672137 2815257054 3602219523 1714310200 949851574\n 2336520456 504372525 1144232445 195534505 4020833259 946396359\n 1559665603 3237234635 826432554 799463637 3769666381 3612718603\n 762518382 2954474157 4052494419 672053721 1345549799 1873779721\n 2936068468 972026843 1411934901 2952294227 2546812485 2659826516\n 2834428224 2455667549 4281380303 2345320401 3932855189 309111429\n 834893265 2699122382 2146331862 3207660078 1202940344 1030469978\n 3945221344 2900087534 722371964 3096315981 1621397645 907802015\n 450035999 2628913824 3160204880 3961963201 349350642 3107386851\n 560688431 2098806006 3142991583 79041694 2234561220 122454157]",
640
+ 624,
641
+ 0,
642
+ 0.0
643
+ ],
644
+ "mlx": 1756905967,
645
+ "mlx_key": [
646
+ 0,
647
+ 1756903229
648
+ ]
649
+ },
650
+ "training_args_snapshot": {
651
+ "output_dir": "outy1266_align_last7",
652
+ "max_kv_size": 1536,
653
+ "model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen3-4B-MLX-8bit",
654
+ "ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/InferenceIllusionist/gpt-oss-20b-MLX-4bit",
655
+ "draft_model_path": null,
656
+ "benchmark_every": 0,
657
+ "benchmark_dataset": "gsm8k",
658
+ "benchmark_dataset_config": "main",
659
+ "benchmark_split": "test",
660
+ "benchmark_samples": 10,
661
+ "benchmark_prompt_key": "question",
662
+ "benchmark_answer_key": "answer",
663
+ "benchmark_max_new_tokens": 196,
664
+ "benchmark_temperature": 0.0,
665
+ "benchmark_top_p": 1.0,
666
+ "benchmark_top_k": 0,
667
+ "benchmark_use_chat_template": true,
668
+ "benchmark_stop_on_error": false,
669
+ "min_think_tokens": 32,
670
+ "think_end_early_bias": -12.0,
671
+ "bias_answer_start_after_min_think": true,
672
+ "train_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/train.jsonl",
673
+ "val_dataset_path": null,
674
+ "dataset_name": null,
675
+ "dataset_config": null,
676
+ "dataset_train_split": "train",
677
+ "dataset_val_split": "test",
678
+ "dataset_prompt_key": "prompt",
679
+ "dataset_answer_key": "completion",
680
+ "max_prompt_len": 350,
681
+ "max_gen_len": 128,
682
+ "system_prompt": null,
683
+ "think_start_tag": "<think>",
684
+ "think_end_tag": "</think>",
685
+ "answer_start_tag": "<answer>",
686
+ "answer_end_tag": "</answer>",
687
+ "think_boost_tokens": 24,
688
+ "think_temperature": 0.15,
689
+ "answer_temperature": 0.1,
690
+ "sampling_top_p": 0.6,
691
+ "sampling_min_p": 0.05,
692
+ "sampling_top_k": 40,
693
+ "repetition_penalty": 1.15,
694
+ "repetition_context_size": 64,
695
+ "hard_mask_mcq_first_token": true,
696
+ "mcq_letter_lift": 10.0,
697
+ "mcq_ban_first_bias": -14.0,
698
+ "nonmcq_ban_first_bias": -10.0,
699
+ "mcq_close_after_k": 1,
700
+ "min_answer_tokens": 6,
701
+ "min_answer_tokens_mcq": 1,
702
+ "bias_close_think": 6.0,
703
+ "bias_answer_start": 3.0,
704
+ "punish_reopen_think": -3.0,
705
+ "punish_extra_think_end": -6.0,
706
+ "bias_eos_after_answer": 4.0,
707
+ "allow_tool_calls": false,
708
+ "tool_call_penalty": 1.0,
709
+ "reward_content_type": "smart",
710
+ "reward_format_weight": 0.2,
711
+ "reward_content_weight": 0.7,
712
+ "think_reward_weight": 0.1,
713
+ "think_len_min": 16,
714
+ "think_len_max": 64,
715
+ "use_lora": false,
716
+ "num_rollout_samples": 3,
717
+ "ppo_batch_size": 1,
718
+ "grpo_beta": 0.04,
719
+ "learning_rate": 1.4e-06,
720
+ "optimizer_beta1": 0.9,
721
+ "optimizer_beta2": 0.95,
722
+ "optimizer_weight_decay": 0.01,
723
+ "grad_clip_norm": 0.35,
724
+ "save_optimizer_state": false,
725
+ "lr_schedule_config": {
726
+ "name": "cosine_decay",
727
+ "arguments": [
728
+ 1.4e-06,
729
+ 60000,
730
+ 2e-07
731
+ ],
732
+ "warmup": 4000,
733
+ "warmup_init": 2e-07
734
+ },
735
+ "grad_accum_steps": 2,
736
+ "num_training_steps": 45869,
737
+ "save_every": 10,
738
+ "eval_every": 0,
739
+ "seed": 15572,
740
+ "shuffle_data": true,
741
+ "use_grad_checkpointing": false,
742
+ "grad_checkpoint_layers": 0,
743
+ "log_samples_every": 1,
744
+ "max_logged_samples": 50,
745
+ "log_prompts": true,
746
+ "sample_log_path": null,
747
+ "kv_bits": 0,
748
+ "kv_group_size": 64,
749
+ "quantized_kv_start": 0,
750
+ "verbose": true,
751
+ "use_wandb": true,
752
+ "wandb_project": "reasonable-qwen-4b-mlxv2isi",
753
+ "wandb_entity": null,
754
+ "wandb_run_name": null,
755
+ "resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last7/checkpoint_20250903_224014_periodic_update_2510",
756
+ "allow_cross_arch_ref": true,
757
+ "align_bridge_path": null,
758
+ "align_bridge_weight": 1.0,
759
+ "align_pool": "mean",
760
+ "align_after_tag": "<|start|>assistant<|channel|>analysis<|message|>",
761
+ "effective_batch_size": 4
762
+ }
763
+ }
checkpoint_20250903_232551_exit_request_update_2602/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232617_shutdown_signal_update_2602/added_tokens.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</answer>": 151669,
3
+ "</img_base64>": 151670,
4
+ "</json_output>": 151671,
5
+ "</ocr_text>": 151672,
6
+ "</think>": 151668,
7
+ "</tool_call>": 151658,
8
+ "</tool_code>": 151673,
9
+ "</tool_response>": 151666,
10
+ "<answer>": 151674,
11
+ "<img_base64>": 151675,
12
+ "<json_output>": 151676,
13
+ "<ocr_text>": 151677,
14
+ "<think>": 151667,
15
+ "<tool_call>": 151657,
16
+ "<tool_code>": 151678,
17
+ "<tool_response>": 151665,
18
+ "<|box_end|>": 151649,
19
+ "<|box_start|>": 151648,
20
+ "<|endoftext|>": 151643,
21
+ "<|file_sep|>": 151664,
22
+ "<|fim_middle|>": 151660,
23
+ "<|fim_pad|>": 151662,
24
+ "<|fim_prefix|>": 151659,
25
+ "<|fim_suffix|>": 151661,
26
+ "<|im_end|>": 151645,
27
+ "<|im_start|>": 151644,
28
+ "<|image_pad|>": 151655,
29
+ "<|object_ref_end|>": 151647,
30
+ "<|object_ref_start|>": 151646,
31
+ "<|quad_end|>": 151651,
32
+ "<|quad_start|>": 151650,
33
+ "<|repo_name|>": 151663,
34
+ "<|video_pad|>": 151656,
35
+ "<|vision_end|>": 151653,
36
+ "<|vision_pad|>": 151654,
37
+ "<|vision_start|>": 151652
38
+ }
checkpoint_20250903_232617_shutdown_signal_update_2602/chat_template.jinja ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
18
+ {%- for message in messages[::-1] %}
19
+ {%- set index = (messages|length - 1) - loop.index0 %}
20
+ {%- set tool_start = "<tool_response>" %}
21
+ {%- set tool_start_length = tool_start|length %}
22
+ {%- set start_of_message = message.content[:tool_start_length] %}
23
+ {%- set tool_end = "</tool_response>" %}
24
+ {%- set tool_end_length = tool_end|length %}
25
+ {%- set start_pos = (message.content|length) - tool_end_length %}
26
+ {%- if start_pos < 0 %}
27
+ {%- set start_pos = 0 %}
28
+ {%- endif %}
29
+ {%- set end_of_message = message.content[start_pos:] %}
30
+ {%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
31
+ {%- set ns.multi_step_tool = false %}
32
+ {%- set ns.last_query_index = index %}
33
+ {%- endif %}
34
+ {%- endfor %}
35
+ {%- for message in messages %}
36
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
37
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
38
+ {%- elif message.role == "assistant" %}
39
+ {%- set content = message.content %}
40
+ {%- set reasoning_content = '' %}
41
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
42
+ {%- set reasoning_content = message.reasoning_content %}
43
+ {%- else %}
44
+ {%- if '</think>' in message.content %}
45
+ {%- set content = (message.content.split('</think>')|last).lstrip('\n') %}
46
+ {%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\n') %}
47
+ {%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
48
+ {%- endif %}
49
+ {%- endif %}
50
+ {%- if loop.index0 > ns.last_query_index %}
51
+ {%- if loop.last or (not loop.last and reasoning_content) %}
52
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
53
+ {%- else %}
54
+ {{- '<|im_start|>' + message.role + '\n' + content }}
55
+ {%- endif %}
56
+ {%- else %}
57
+ {{- '<|im_start|>' + message.role + '\n' + content }}
58
+ {%- endif %}
59
+ {%- if message.tool_calls %}
60
+ {%- for tool_call in message.tool_calls %}
61
+ {%- if (loop.first and content) or (not loop.first) %}
62
+ {{- '\n' }}
63
+ {%- endif %}
64
+ {%- if tool_call.function %}
65
+ {%- set tool_call = tool_call.function %}
66
+ {%- endif %}
67
+ {{- '<tool_call>\n{"name": "' }}
68
+ {{- tool_call.name }}
69
+ {{- '", "arguments": ' }}
70
+ {%- if tool_call.arguments is string %}
71
+ {{- tool_call.arguments }}
72
+ {%- else %}
73
+ {{- tool_call.arguments | tojson }}
74
+ {%- endif %}
75
+ {{- '}\n</tool_call>' }}
76
+ {%- endfor %}
77
+ {%- endif %}
78
+ {{- '<|im_end|>\n' }}
79
+ {%- elif message.role == "tool" %}
80
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
81
+ {{- '<|im_start|>user' }}
82
+ {%- endif %}
83
+ {{- '\n<tool_response>\n' }}
84
+ {{- message.content }}
85
+ {{- '\n</tool_response>' }}
86
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
87
+ {{- '<|im_end|>\n' }}
88
+ {%- endif %}
89
+ {%- endif %}
90
+ {%- endfor %}
91
+ {%- if add_generation_prompt %}
92
+ {{- '<|im_start|>assistant\n' }}
93
+ {%- if enable_thinking is defined and enable_thinking is false %}
94
+ {{- '<think>\n\n</think>\n\n' }}
95
+ {%- endif %}
96
+ {%- endif %}
checkpoint_20250903_232617_shutdown_signal_update_2602/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2560,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 9728,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "qwen3",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 36,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": true,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936
30
+ }
checkpoint_20250903_232617_shutdown_signal_update_2602/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint_20250903_232617_shutdown_signal_update_2602/special_tokens_map.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "</answer>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</img_base64>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "</json_output>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "</ocr_text>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "</think>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "</tool_code>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "<answer>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "<img_base64>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<json_output>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "<ocr_text>",
68
+ "lstrip": false,
69
+ "normalized": false,
70
+ "rstrip": false,
71
+ "single_word": false
72
+ },
73
+ {
74
+ "content": "<think>",
75
+ "lstrip": false,
76
+ "normalized": false,
77
+ "rstrip": false,
78
+ "single_word": false
79
+ },
80
+ {
81
+ "content": "<tool_code>",
82
+ "lstrip": false,
83
+ "normalized": false,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ }
87
+ ],
88
+ "eos_token": {
89
+ "content": "<|im_end|>",
90
+ "lstrip": false,
91
+ "normalized": false,
92
+ "rstrip": false,
93
+ "single_word": false
94
+ },
95
+ "pad_token": {
96
+ "content": "<|endoftext|>",
97
+ "lstrip": false,
98
+ "normalized": false,
99
+ "rstrip": false,
100
+ "single_word": false
101
+ }
102
+ }
checkpoint_20250903_232617_shutdown_signal_update_2602/tokenizer_config.json ADDED
@@ -0,0 +1,318 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ },
181
+ "151665": {
182
+ "content": "<tool_response>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": false,
186
+ "single_word": false,
187
+ "special": false
188
+ },
189
+ "151666": {
190
+ "content": "</tool_response>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": false,
194
+ "single_word": false,
195
+ "special": false
196
+ },
197
+ "151667": {
198
+ "content": "<think>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": false,
202
+ "single_word": false,
203
+ "special": true
204
+ },
205
+ "151668": {
206
+ "content": "</think>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": false,
210
+ "single_word": false,
211
+ "special": true
212
+ },
213
+ "151669": {
214
+ "content": "</answer>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "151670": {
222
+ "content": "</img_base64>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": false,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "151671": {
230
+ "content": "</json_output>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": false,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "151672": {
238
+ "content": "</ocr_text>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": false,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "151673": {
246
+ "content": "</tool_code>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": false,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "151674": {
254
+ "content": "<answer>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": false,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "151675": {
262
+ "content": "<img_base64>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": false,
266
+ "single_word": false,
267
+ "special": true
268
+ },
269
+ "151676": {
270
+ "content": "<json_output>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false,
275
+ "special": true
276
+ },
277
+ "151677": {
278
+ "content": "<ocr_text>",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": false,
282
+ "single_word": false,
283
+ "special": true
284
+ },
285
+ "151678": {
286
+ "content": "<tool_code>",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": false,
290
+ "single_word": false,
291
+ "special": true
292
+ }
293
+ },
294
+ "additional_special_tokens": [
295
+ "</answer>",
296
+ "</img_base64>",
297
+ "</json_output>",
298
+ "</ocr_text>",
299
+ "</think>",
300
+ "</tool_code>",
301
+ "<answer>",
302
+ "<img_base64>",
303
+ "<json_output>",
304
+ "<ocr_text>",
305
+ "<think>",
306
+ "<tool_code>"
307
+ ],
308
+ "bos_token": null,
309
+ "clean_up_tokenization_spaces": false,
310
+ "eos_token": "<|im_end|>",
311
+ "errors": "replace",
312
+ "extra_special_tokens": {},
313
+ "model_max_length": 131072,
314
+ "pad_token": "<|endoftext|>",
315
+ "split_special_tokens": false,
316
+ "tokenizer_class": "Qwen2Tokenizer",
317
+ "unk_token": null
318
+ }
checkpoint_20250903_232617_shutdown_signal_update_2602/training_state.json ADDED
@@ -0,0 +1,763 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "global_step": 2602,
3
+ "num_updates": 2602,
4
+ "use_lora": false,
5
+ "rng_state": {
6
+ "python": [
7
+ 3,
8
+ [
9
+ 3228212754,
10
+ 279998097,
11
+ 2056665714,
12
+ 3603597067,
13
+ 3476614728,
14
+ 3031000,
15
+ 2954941703,
16
+ 3205203341,
17
+ 1022055790,
18
+ 1415253971,
19
+ 2181335448,
20
+ 3582899399,
21
+ 1644250258,
22
+ 1793404199,
23
+ 2566317855,
24
+ 109101073,
25
+ 3150506052,
26
+ 2724067265,
27
+ 97547420,
28
+ 2995366220,
29
+ 3519916584,
30
+ 3209457352,
31
+ 3126309277,
32
+ 3451453441,
33
+ 1702671318,
34
+ 2107243699,
35
+ 2422777587,
36
+ 360391179,
37
+ 585237960,
38
+ 2832187814,
39
+ 2662498495,
40
+ 2394734758,
41
+ 1444276186,
42
+ 1837837410,
43
+ 961418280,
44
+ 1195482276,
45
+ 935680843,
46
+ 3968754582,
47
+ 2211483879,
48
+ 1837545159,
49
+ 2419172187,
50
+ 2042898634,
51
+ 2665785964,
52
+ 356867850,
53
+ 3385622908,
54
+ 3868724832,
55
+ 2977197007,
56
+ 1479685303,
57
+ 1540416526,
58
+ 331944145,
59
+ 3530610791,
60
+ 2842657301,
61
+ 2372012930,
62
+ 639325304,
63
+ 3360783663,
64
+ 3762064881,
65
+ 2160841949,
66
+ 4001870304,
67
+ 1864777350,
68
+ 717368547,
69
+ 362746266,
70
+ 3779466655,
71
+ 1960142933,
72
+ 1723756462,
73
+ 1326392635,
74
+ 2695751926,
75
+ 1728155752,
76
+ 2347807318,
77
+ 1862557049,
78
+ 3999800477,
79
+ 2277115301,
80
+ 1516014806,
81
+ 3846995662,
82
+ 2388310657,
83
+ 2859396105,
84
+ 1441398545,
85
+ 3439746988,
86
+ 3678365781,
87
+ 190759243,
88
+ 4075004972,
89
+ 342634350,
90
+ 2045571341,
91
+ 557542918,
92
+ 2240372410,
93
+ 1982579501,
94
+ 255722053,
95
+ 1496213542,
96
+ 2563043770,
97
+ 620403458,
98
+ 3666797179,
99
+ 4194690277,
100
+ 1725488508,
101
+ 2427139442,
102
+ 911138792,
103
+ 2810519096,
104
+ 403900489,
105
+ 1235282796,
106
+ 3323510948,
107
+ 3976438655,
108
+ 2592317228,
109
+ 1469307213,
110
+ 456462311,
111
+ 3393494366,
112
+ 669420558,
113
+ 1939678322,
114
+ 4073521067,
115
+ 3342970892,
116
+ 2452710290,
117
+ 2793129860,
118
+ 1342676286,
119
+ 2394512596,
120
+ 1832972552,
121
+ 3814703913,
122
+ 945666136,
123
+ 3552696630,
124
+ 3165169504,
125
+ 4234896064,
126
+ 3288485605,
127
+ 2637492903,
128
+ 3688384962,
129
+ 1693185353,
130
+ 3041897498,
131
+ 3666651581,
132
+ 216719692,
133
+ 2101621578,
134
+ 1056505155,
135
+ 3806530083,
136
+ 2345205292,
137
+ 2868630622,
138
+ 3289598319,
139
+ 597570811,
140
+ 2632481252,
141
+ 3875619652,
142
+ 289253672,
143
+ 77311731,
144
+ 4133169138,
145
+ 3462637509,
146
+ 2047139049,
147
+ 109094532,
148
+ 1309238588,
149
+ 4183047643,
150
+ 3849080966,
151
+ 2612177601,
152
+ 2747398543,
153
+ 2317389804,
154
+ 2206126400,
155
+ 3538931825,
156
+ 4015230236,
157
+ 1548164965,
158
+ 3615557249,
159
+ 3874732623,
160
+ 383396796,
161
+ 78135062,
162
+ 836187159,
163
+ 1405060375,
164
+ 4124734680,
165
+ 2283841137,
166
+ 130389111,
167
+ 2370256028,
168
+ 3117432748,
169
+ 2188669863,
170
+ 1274090654,
171
+ 136753743,
172
+ 3535123905,
173
+ 928699189,
174
+ 2096609090,
175
+ 140690583,
176
+ 1335216202,
177
+ 1664118110,
178
+ 370920611,
179
+ 3381310767,
180
+ 635177978,
181
+ 919944943,
182
+ 1794890933,
183
+ 3785921605,
184
+ 988623168,
185
+ 3586512205,
186
+ 3578229114,
187
+ 2227888166,
188
+ 1199377973,
189
+ 198574987,
190
+ 3359460270,
191
+ 41421261,
192
+ 3818002247,
193
+ 1611622405,
194
+ 1172309347,
195
+ 1127078025,
196
+ 864036890,
197
+ 3821592921,
198
+ 1074908116,
199
+ 4045652492,
200
+ 150578148,
201
+ 1490173923,
202
+ 1255586022,
203
+ 3620141376,
204
+ 1119552840,
205
+ 1604539422,
206
+ 2899876376,
207
+ 1634692146,
208
+ 2430160935,
209
+ 3525965146,
210
+ 3634230653,
211
+ 2801553134,
212
+ 2236757588,
213
+ 3209439651,
214
+ 638937059,
215
+ 332630833,
216
+ 2099354597,
217
+ 2453040579,
218
+ 2294611386,
219
+ 3583062819,
220
+ 3057053813,
221
+ 2651436307,
222
+ 820127535,
223
+ 2234442340,
224
+ 2926763826,
225
+ 2608312654,
226
+ 622663536,
227
+ 1327274392,
228
+ 579028969,
229
+ 1638676546,
230
+ 725757522,
231
+ 2287475756,
232
+ 2245008208,
233
+ 583117806,
234
+ 1681911299,
235
+ 4043302089,
236
+ 2733469006,
237
+ 3482246612,
238
+ 1699131086,
239
+ 2009286409,
240
+ 1012929586,
241
+ 1831443753,
242
+ 3401815932,
243
+ 2335754234,
244
+ 1338527095,
245
+ 1234451965,
246
+ 2260706742,
247
+ 3195944902,
248
+ 933824426,
249
+ 3055004187,
250
+ 442858759,
251
+ 3568329250,
252
+ 2010957609,
253
+ 2249247272,
254
+ 1258950970,
255
+ 205311363,
256
+ 607774665,
257
+ 2098340782,
258
+ 3806387601,
259
+ 1242750989,
260
+ 3609867707,
261
+ 241559992,
262
+ 2980673848,
263
+ 249731274,
264
+ 2840307599,
265
+ 3768697067,
266
+ 2786582312,
267
+ 234722404,
268
+ 501116625,
269
+ 3883427569,
270
+ 2178103353,
271
+ 2378845018,
272
+ 4115296692,
273
+ 870819025,
274
+ 4131469458,
275
+ 4141674918,
276
+ 2078498292,
277
+ 3999732609,
278
+ 2537021868,
279
+ 3558720844,
280
+ 3704987724,
281
+ 398283175,
282
+ 1690550218,
283
+ 2326228611,
284
+ 2685433390,
285
+ 3899703572,
286
+ 3902620680,
287
+ 2509824424,
288
+ 1513684241,
289
+ 2524739654,
290
+ 3117375505,
291
+ 1379082516,
292
+ 2734650653,
293
+ 2355589146,
294
+ 2451030867,
295
+ 2887987439,
296
+ 2131990321,
297
+ 2214951112,
298
+ 4219817646,
299
+ 1537640525,
300
+ 3339366035,
301
+ 2845436015,
302
+ 3914197935,
303
+ 2658843500,
304
+ 3766601378,
305
+ 1182641028,
306
+ 1457188584,
307
+ 4153939547,
308
+ 2335418690,
309
+ 1152149210,
310
+ 870429227,
311
+ 4230760942,
312
+ 1976040608,
313
+ 785192739,
314
+ 184789279,
315
+ 1377261339,
316
+ 707722660,
317
+ 3558575032,
318
+ 53709080,
319
+ 805111893,
320
+ 4277091928,
321
+ 362164270,
322
+ 3343191968,
323
+ 764658199,
324
+ 1099715208,
325
+ 4090812555,
326
+ 68096351,
327
+ 627278739,
328
+ 1254763496,
329
+ 4216318088,
330
+ 1188064625,
331
+ 3429733508,
332
+ 3022473048,
333
+ 3847088807,
334
+ 3053202734,
335
+ 3740308065,
336
+ 1993837633,
337
+ 763503311,
338
+ 1037390932,
339
+ 588432609,
340
+ 1536894725,
341
+ 1306273771,
342
+ 2543433092,
343
+ 1004993106,
344
+ 700767389,
345
+ 2357318127,
346
+ 3169490527,
347
+ 3918469492,
348
+ 2033525981,
349
+ 519262086,
350
+ 1129257868,
351
+ 2376802674,
352
+ 2854805533,
353
+ 3880903346,
354
+ 74109069,
355
+ 2008994945,
356
+ 4114401423,
357
+ 2811556615,
358
+ 3471501456,
359
+ 837319289,
360
+ 2953139467,
361
+ 2327107742,
362
+ 133550557,
363
+ 3916368228,
364
+ 2552796922,
365
+ 1537772292,
366
+ 1833298346,
367
+ 2009274777,
368
+ 424528351,
369
+ 2717359636,
370
+ 2825961025,
371
+ 1861953389,
372
+ 2884908497,
373
+ 3801183979,
374
+ 1856316371,
375
+ 2235711410,
376
+ 2988382290,
377
+ 1187502951,
378
+ 3671488340,
379
+ 1954214264,
380
+ 2964041676,
381
+ 1604199356,
382
+ 3311430473,
383
+ 1009672891,
384
+ 1165005921,
385
+ 3302391626,
386
+ 2660756877,
387
+ 892284552,
388
+ 2393343854,
389
+ 3816239257,
390
+ 3383925622,
391
+ 2632741631,
392
+ 150411860,
393
+ 4211331326,
394
+ 2563945928,
395
+ 3147050596,
396
+ 86291816,
397
+ 1306956905,
398
+ 3702342117,
399
+ 221494470,
400
+ 2274829119,
401
+ 3628261915,
402
+ 399492089,
403
+ 1039477125,
404
+ 3636665160,
405
+ 1499731795,
406
+ 2521019510,
407
+ 3820576314,
408
+ 651306401,
409
+ 2222447223,
410
+ 762476894,
411
+ 3372134685,
412
+ 2773485657,
413
+ 1506070889,
414
+ 166626664,
415
+ 2296565478,
416
+ 2627756446,
417
+ 1432061762,
418
+ 875491433,
419
+ 3481499302,
420
+ 647568111,
421
+ 1079088546,
422
+ 3657637708,
423
+ 1156169451,
424
+ 1864010770,
425
+ 2499694803,
426
+ 1857267438,
427
+ 2291220436,
428
+ 3324416067,
429
+ 2904742373,
430
+ 1611590803,
431
+ 3124040330,
432
+ 2795308170,
433
+ 3443049470,
434
+ 2264039167,
435
+ 3511022788,
436
+ 2491483558,
437
+ 3811739223,
438
+ 3115068110,
439
+ 3279986730,
440
+ 3570889482,
441
+ 335123233,
442
+ 360967562,
443
+ 311303413,
444
+ 2380553530,
445
+ 1843925797,
446
+ 947441595,
447
+ 3427872459,
448
+ 2885833189,
449
+ 79715633,
450
+ 514452158,
451
+ 1736537499,
452
+ 3982473220,
453
+ 1088572403,
454
+ 2384199361,
455
+ 1784448850,
456
+ 2419864392,
457
+ 639538932,
458
+ 2112974083,
459
+ 3934260396,
460
+ 2850658226,
461
+ 585502134,
462
+ 1760060627,
463
+ 3534598283,
464
+ 118824413,
465
+ 799460671,
466
+ 1478757883,
467
+ 3050258322,
468
+ 2579705998,
469
+ 3133048451,
470
+ 2223823746,
471
+ 603685429,
472
+ 2387682555,
473
+ 3009556562,
474
+ 1521901351,
475
+ 2646225121,
476
+ 1773849074,
477
+ 3347873314,
478
+ 1918004826,
479
+ 1726862757,
480
+ 2513387996,
481
+ 508242897,
482
+ 339225034,
483
+ 1805609427,
484
+ 323575129,
485
+ 765632792,
486
+ 4218304970,
487
+ 296616831,
488
+ 880218140,
489
+ 2165587941,
490
+ 2069055842,
491
+ 110437221,
492
+ 1281149935,
493
+ 3527927111,
494
+ 4246842114,
495
+ 501470319,
496
+ 110693585,
497
+ 2361144593,
498
+ 958597015,
499
+ 540902541,
500
+ 2351458930,
501
+ 3824880566,
502
+ 2412074905,
503
+ 1526349815,
504
+ 2951752081,
505
+ 1577401122,
506
+ 645843044,
507
+ 1122678576,
508
+ 3870028103,
509
+ 3563016932,
510
+ 2394128327,
511
+ 1412316709,
512
+ 1770485652,
513
+ 3328500527,
514
+ 2153223048,
515
+ 1571141422,
516
+ 1950096991,
517
+ 3843465276,
518
+ 1189099356,
519
+ 1707319037,
520
+ 1312370001,
521
+ 4085046861,
522
+ 3231557091,
523
+ 3340822452,
524
+ 1898079545,
525
+ 1373556942,
526
+ 891254598,
527
+ 543958551,
528
+ 3929023245,
529
+ 3262642994,
530
+ 3221866934,
531
+ 2390598216,
532
+ 1036903094,
533
+ 2097686434,
534
+ 129207147,
535
+ 2964160713,
536
+ 1881698322,
537
+ 1515645930,
538
+ 3226263079,
539
+ 1986344504,
540
+ 3005241002,
541
+ 3923005616,
542
+ 1430681832,
543
+ 2048310876,
544
+ 631221366,
545
+ 2972301268,
546
+ 3276451436,
547
+ 3841702416,
548
+ 2585920783,
549
+ 4070240888,
550
+ 3697275337,
551
+ 564704448,
552
+ 266488781,
553
+ 3252391941,
554
+ 2796272702,
555
+ 2665303656,
556
+ 3413456714,
557
+ 2470069594,
558
+ 2470272528,
559
+ 1660836326,
560
+ 991966684,
561
+ 3814972761,
562
+ 1794669421,
563
+ 892478324,
564
+ 461928726,
565
+ 2461293569,
566
+ 561261597,
567
+ 3704935953,
568
+ 1737302340,
569
+ 4141207295,
570
+ 1072950705,
571
+ 1100316204,
572
+ 49599,
573
+ 3655686352,
574
+ 2604516846,
575
+ 169477890,
576
+ 1552495902,
577
+ 4076641781,
578
+ 4288801538,
579
+ 2661645907,
580
+ 2681760413,
581
+ 3191805957,
582
+ 1555588618,
583
+ 2005446622,
584
+ 2876389060,
585
+ 2506534665,
586
+ 3737615325,
587
+ 539844861,
588
+ 2614598422,
589
+ 3169363989,
590
+ 1460455376,
591
+ 3597830757,
592
+ 79929582,
593
+ 1950458365,
594
+ 4188478473,
595
+ 675585740,
596
+ 905482938,
597
+ 3092725,
598
+ 3333208631,
599
+ 2096209247,
600
+ 1647933404,
601
+ 2581635632,
602
+ 1778498943,
603
+ 3092521474,
604
+ 988446911,
605
+ 1790098568,
606
+ 2163371370,
607
+ 3874214587,
608
+ 3811993331,
609
+ 3505387423,
610
+ 2828235272,
611
+ 1132675285,
612
+ 4047174618,
613
+ 2760040098,
614
+ 3320824721,
615
+ 202201724,
616
+ 1355357947,
617
+ 627906198,
618
+ 4127456551,
619
+ 2431589489,
620
+ 675868086,
621
+ 2865627058,
622
+ 4123612491,
623
+ 4047286524,
624
+ 747101435,
625
+ 1216754111,
626
+ 2427503810,
627
+ 3514051898,
628
+ 452300667,
629
+ 2349273222,
630
+ 2983441288,
631
+ 1420412231,
632
+ 2035374170,
633
+ 336
634
+ ],
635
+ null
636
+ ],
637
+ "numpy": [
638
+ "MT19937",
639
+ "[ 15572 980513701 2334715163 3585534944 1822198675 158479007\n 1300107201 2003433159 424170022 4102602503 2437447838 1924282775\n 2084306490 4132823124 4216394081 1526156729 4231078312 3658730376\n 3599347945 3798337125 544676946 3949203055 1596292274 2255158710\n 703032348 636265253 2880318131 3345387760 162413307 2418710564\n 3712245020 2175226970 563044056 2939814745 2838234633 468141434\n 616739654 564867267 2130155541 815641611 601811839 2004017220\n 3627706467 3951463947 810570068 3028421201 454655469 3270345648\n 555008207 3255294172 3259033389 429183833 272696145 2007214122\n 2243779629 1934853570 517873959 1769075612 2057249323 825685197\n 21711389 271106734 3943034084 3547272802 1718926725 3289803093\n 2224067888 3644890877 3431377018 1754806530 2376472949 2892610362\n 1500501344 3824621710 1417356523 4122790557 775716514 1813030967\n 3994108828 391693578 1388189506 1179060416 1727839607 3646219745\n 3467814014 1642908326 1500533561 1281477999 2139613886 209245364\n 1449934686 3593983958 693904485 999778614 847538348 922048571\n 1218470567 916177375 1196217437 3715695791 3572687936 2177952212\n 2976499668 1502043605 3295312140 473156180 3489777550 4116262924\n 726728023 266896064 1207195182 1422796794 3298302327 2546802485\n 3089285157 4087066326 281999229 3833891270 4133996591 3393553875\n 1790581640 1088958086 372349629 1150038540 3967005853 3449868499\n 2783031950 745299387 4177989191 440862037 1630705675 3432601205\n 3976086290 2477494074 2631694750 55867155 3381732871 3988268061\n 4190916192 3426936986 1292805609 2800262421 1433946529 2604379311\n 3803100625 2130255467 4134910564 3389303350 912295037 1986407142\n 60132537 1465812628 2556679777 768839848 561642210 2962288069\n 2900250399 2446769166 2830015834 1820339287 3836052622 3843539266\n 3448899655 719693687 2608513431 807017838 705657612 1313405027\n 308572498 3011745795 3544875535 3662789479 792949536 1679330381\n 2262304426 3714222327 3252067572 3530366244 2847367189 1818638534\n 4196918839 1197188921 1714287054 3610324578 1759524349 658319653\n 4062775635 2170322251 3246948233 467741743 2311362121 1326083926\n 2215935251 2860151158 3543259014 4288886076 1000061343 35607424\n 3800789574 1024328448 2871144392 1452668283 2547917804 794856369\n 3652179617 850026679 66787329 99615284 3360338772 2597540132\n 1809210064 3923947080 4257343339 372176093 3483099399 721873771\n 1101276431 2834898271 76176267 4200628658 2773607601 3516401276\n 3454896121 2354147457 1223458703 3128119783 529915195 2585910314\n 3544532140 551469296 3014823830 3511999051 1463986000 3624754398\n 704833819 3852626834 2711357985 1324465084 1627535231 570708901\n 2717601289 725003848 1942965082 328822490 3620824822 1651096734\n 2785116849 1685019798 1956468619 3534906475 922043906 3007951301\n 4238844799 755293417 3011697131 446474988 2478554140 2374977239\n 1380053003 2243890933 2686719863 2130566958 3840030097 1793607073\n 3796244776 2639065600 3001847252 1477803129 1197364324 2384746982\n 766409730 2722234329 3102942583 2032952634 961381465 1104664880\n 431680105 3809455490 596671995 2974418974 2572169252 3211875863\n 1700476003 2461146565 4138090399 3571815849 2874279728 3348404697\n 1894538786 1654054896 1861571639 643113073 1024018105 110160162\n 3688705424 1588307750 1010684555 1071478016 3908839978 1361810520\n 4077136201 1942152543 3862285636 788921650 3325732842 2086596894\n 2354559341 920752382 2089296746 412564844 3783133394 3542635692\n 151261507 2024582056 27012071 3714904670 2251973869 3415653000\n 3122199829 1793993298 3504479999 2494502573 3472923469 628884745\n 400477905 2068607674 2511099917 76006962 4127192322 656421806\n 2099151600 4055081824 3120853595 1902231402 1793968517 2739152483\n 3675524757 3637643391 2093725246 3500121902 291758869 1652675998\n 1139040273 1626503079 1329269718 3800218668 1301440229 4094598479\n 2030419032 2206069114 2167504310 3568823651 1480132672 4189195270\n 1003514971 2108978250 4235920891 1015223357 1328980599 3065593845\n 772959451 1736648400 1111173855 2673522945 3202521755 1515315402\n 1025987717 2556593896 1098413506 3353399904 2969501057 2094670114\n 2847919939 3042807578 2837794286 1675561875 2905519122 4265188297\n 2610926124 846285729 2241003777 2845770412 2129473060 3762815768\n 2144316967 1546390655 1870814520 1524713984 3716398313 3346580439\n 507361322 3071157273 715371311 667081236 1562427246 1416032086\n 2719153631 1214541502 3927763433 4093412577 1609261242 1472085592\n 2916826031 2284397012 4029669634 4115943418 618581971 2078599894\n 2195634027 568626950 551593208 1404161907 4048083862 206856294\n 2947194844 2767249973 1603907667 1631351803 1522568516 3530861276\n 932299423 1409409376 1006753259 2778802782 2428826612 4160546743\n 1957871534 3303229622 2827456021 2670092224 3383794488 945607414\n 1983832766 358657548 4271708270 755497396 3434273208 1362230140\n 1689853703 801994005 3746197505 3596436611 1542752314 3254670338\n 1548922657 4130814301 3943625172 221411986 713064282 4233062979\n 4075891970 2437106728 544573526 3064910259 151483803 689855214\n 1545945006 3683633364 3289795997 329789217 4168762065 3787243687\n 2265695874 39834191 1266893307 1324209011 1243246540 3973960372\n 541659911 1362379416 1601251635 1863255185 3125665448 4219983083\n 2682202466 567260347 1405575843 3420495303 2758288434 3586390223\n 336221788 2630832173 3573336941 1218191945 2213154892 2821992107\n 3693992851 696758711 279252507 2892498320 1862489732 255938916\n 1661968992 3484941106 4082783555 2936202287 3514506417 1758172715\n 517257860 3411688455 3637760904 1419041484 2137852375 757229925\n 3065881553 339906360 661617426 760623637 1046610245 876310326\n 1014024268 3183719419 2438284349 1776461276 3594106675 2854090739\n 2602159385 4289618092 2857553425 695761542 3083079398 76408317\n 3254461403 153861699 2129194363 2941386031 4094753231 1156889483\n 2242959746 1437290897 1982676962 1514123682 1353077347 1818424511\n 2436251404 4085735581 3547311726 4033030170 2353214711 479069124\n 241656432 4085762125 551929572 2454945299 132044757 524232234\n 3799812788 6269782 1338614034 3443833252 1258856457 4165168463\n 355192100 2534159709 2851727269 2518795790 3366162664 3414356452\n 1869549905 2688919231 3437293505 2606459835 902202159 3721325246\n 1701626821 1124672137 2815257054 3602219523 1714310200 949851574\n 2336520456 504372525 1144232445 195534505 4020833259 946396359\n 1559665603 3237234635 826432554 799463637 3769666381 3612718603\n 762518382 2954474157 4052494419 672053721 1345549799 1873779721\n 2936068468 972026843 1411934901 2952294227 2546812485 2659826516\n 2834428224 2455667549 4281380303 2345320401 3932855189 309111429\n 834893265 2699122382 2146331862 3207660078 1202940344 1030469978\n 3945221344 2900087534 722371964 3096315981 1621397645 907802015\n 450035999 2628913824 3160204880 3961963201 349350642 3107386851\n 560688431 2098806006 3142991583 79041694 2234561220 122454157]",
640
+ 624,
641
+ 0,
642
+ 0.0
643
+ ],
644
+ "mlx": 1756905991,
645
+ "mlx_key": [
646
+ 0,
647
+ 1756903229
648
+ ]
649
+ },
650
+ "training_args_snapshot": {
651
+ "output_dir": "outy1266_align_last7",
652
+ "max_kv_size": 1536,
653
+ "model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen3-4B-MLX-8bit",
654
+ "ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/InferenceIllusionist/gpt-oss-20b-MLX-4bit",
655
+ "draft_model_path": null,
656
+ "benchmark_every": 0,
657
+ "benchmark_dataset": "gsm8k",
658
+ "benchmark_dataset_config": "main",
659
+ "benchmark_split": "test",
660
+ "benchmark_samples": 10,
661
+ "benchmark_prompt_key": "question",
662
+ "benchmark_answer_key": "answer",
663
+ "benchmark_max_new_tokens": 196,
664
+ "benchmark_temperature": 0.0,
665
+ "benchmark_top_p": 1.0,
666
+ "benchmark_top_k": 0,
667
+ "benchmark_use_chat_template": true,
668
+ "benchmark_stop_on_error": false,
669
+ "min_think_tokens": 32,
670
+ "think_end_early_bias": -12.0,
671
+ "bias_answer_start_after_min_think": true,
672
+ "train_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/train.jsonl",
673
+ "val_dataset_path": null,
674
+ "dataset_name": null,
675
+ "dataset_config": null,
676
+ "dataset_train_split": "train",
677
+ "dataset_val_split": "test",
678
+ "dataset_prompt_key": "prompt",
679
+ "dataset_answer_key": "completion",
680
+ "max_prompt_len": 350,
681
+ "max_gen_len": 128,
682
+ "system_prompt": null,
683
+ "think_start_tag": "<think>",
684
+ "think_end_tag": "</think>",
685
+ "answer_start_tag": "<answer>",
686
+ "answer_end_tag": "</answer>",
687
+ "think_boost_tokens": 24,
688
+ "think_temperature": 0.15,
689
+ "answer_temperature": 0.1,
690
+ "sampling_top_p": 0.6,
691
+ "sampling_min_p": 0.05,
692
+ "sampling_top_k": 40,
693
+ "repetition_penalty": 1.15,
694
+ "repetition_context_size": 64,
695
+ "hard_mask_mcq_first_token": true,
696
+ "mcq_letter_lift": 10.0,
697
+ "mcq_ban_first_bias": -14.0,
698
+ "nonmcq_ban_first_bias": -10.0,
699
+ "mcq_close_after_k": 1,
700
+ "min_answer_tokens": 6,
701
+ "min_answer_tokens_mcq": 1,
702
+ "bias_close_think": 6.0,
703
+ "bias_answer_start": 3.0,
704
+ "punish_reopen_think": -3.0,
705
+ "punish_extra_think_end": -6.0,
706
+ "bias_eos_after_answer": 4.0,
707
+ "allow_tool_calls": false,
708
+ "tool_call_penalty": 1.0,
709
+ "reward_content_type": "smart",
710
+ "reward_format_weight": 0.2,
711
+ "reward_content_weight": 0.7,
712
+ "think_reward_weight": 0.1,
713
+ "think_len_min": 16,
714
+ "think_len_max": 64,
715
+ "use_lora": false,
716
+ "num_rollout_samples": 3,
717
+ "ppo_batch_size": 1,
718
+ "grpo_beta": 0.04,
719
+ "learning_rate": 1.4e-06,
720
+ "optimizer_beta1": 0.9,
721
+ "optimizer_beta2": 0.95,
722
+ "optimizer_weight_decay": 0.01,
723
+ "grad_clip_norm": 0.35,
724
+ "save_optimizer_state": false,
725
+ "lr_schedule_config": {
726
+ "name": "cosine_decay",
727
+ "arguments": [
728
+ 1.4e-06,
729
+ 60000,
730
+ 2e-07
731
+ ],
732
+ "warmup": 4000,
733
+ "warmup_init": 2e-07
734
+ },
735
+ "grad_accum_steps": 2,
736
+ "num_training_steps": 45869,
737
+ "save_every": 10,
738
+ "eval_every": 0,
739
+ "seed": 15572,
740
+ "shuffle_data": true,
741
+ "use_grad_checkpointing": false,
742
+ "grad_checkpoint_layers": 0,
743
+ "log_samples_every": 1,
744
+ "max_logged_samples": 50,
745
+ "log_prompts": true,
746
+ "sample_log_path": null,
747
+ "kv_bits": 0,
748
+ "kv_group_size": 64,
749
+ "quantized_kv_start": 0,
750
+ "verbose": true,
751
+ "use_wandb": true,
752
+ "wandb_project": "reasonable-qwen-4b-mlxv2isi",
753
+ "wandb_entity": null,
754
+ "wandb_run_name": null,
755
+ "resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy1266_align_last7/checkpoint_20250903_224014_periodic_update_2510",
756
+ "allow_cross_arch_ref": true,
757
+ "align_bridge_path": null,
758
+ "align_bridge_weight": 1.0,
759
+ "align_pool": "mean",
760
+ "align_after_tag": "<|start|>assistant<|channel|>analysis<|message|>",
761
+ "effective_batch_size": 4
762
+ }
763
+ }
checkpoint_20250903_232617_shutdown_signal_update_2602/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
plots/loss_vs_updates.png ADDED
plots/lr_vs_updates.png ADDED
plots/reward_vs_updates.png ADDED
training_metrics.csv ADDED
The diff for this file is too large to render. See raw diff