Commit
·
d641c33
1
Parent(s):
969e19a
Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- added_tokens.json +38 -0
- chat_template.jinja +96 -0
- config.json +30 -0
- merges.txt +0 -0
- special_tokens_map.json +102 -0
- tokenizer_config.json +318 -0
- training_state.json +747 -0
- vocab.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"</answer>": 151669,
|
3 |
+
"</img_base64>": 151670,
|
4 |
+
"</json_output>": 151671,
|
5 |
+
"</ocr_text>": 151672,
|
6 |
+
"</think>": 151668,
|
7 |
+
"</tool_call>": 151658,
|
8 |
+
"</tool_code>": 151673,
|
9 |
+
"</tool_response>": 151666,
|
10 |
+
"<answer>": 151674,
|
11 |
+
"<img_base64>": 151675,
|
12 |
+
"<json_output>": 151676,
|
13 |
+
"<ocr_text>": 151677,
|
14 |
+
"<think>": 151667,
|
15 |
+
"<tool_call>": 151657,
|
16 |
+
"<tool_code>": 151678,
|
17 |
+
"<tool_response>": 151665,
|
18 |
+
"<|box_end|>": 151649,
|
19 |
+
"<|box_start|>": 151648,
|
20 |
+
"<|endoftext|>": 151643,
|
21 |
+
"<|file_sep|>": 151664,
|
22 |
+
"<|fim_middle|>": 151660,
|
23 |
+
"<|fim_pad|>": 151662,
|
24 |
+
"<|fim_prefix|>": 151659,
|
25 |
+
"<|fim_suffix|>": 151661,
|
26 |
+
"<|im_end|>": 151645,
|
27 |
+
"<|im_start|>": 151644,
|
28 |
+
"<|image_pad|>": 151655,
|
29 |
+
"<|object_ref_end|>": 151647,
|
30 |
+
"<|object_ref_start|>": 151646,
|
31 |
+
"<|quad_end|>": 151651,
|
32 |
+
"<|quad_start|>": 151650,
|
33 |
+
"<|repo_name|>": 151663,
|
34 |
+
"<|video_pad|>": 151656,
|
35 |
+
"<|vision_end|>": 151653,
|
36 |
+
"<|vision_pad|>": 151654,
|
37 |
+
"<|vision_start|>": 151652
|
38 |
+
}
|
chat_template.jinja
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{%- if tools %}
|
2 |
+
{{- '<|im_start|>system\n' }}
|
3 |
+
{%- if messages[0].role == 'system' %}
|
4 |
+
{{- messages[0].content + '\n\n' }}
|
5 |
+
{%- endif %}
|
6 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
7 |
+
{%- for tool in tools %}
|
8 |
+
{{- "\n" }}
|
9 |
+
{{- tool | tojson }}
|
10 |
+
{%- endfor %}
|
11 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
12 |
+
{%- else %}
|
13 |
+
{%- if messages[0].role == 'system' %}
|
14 |
+
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
15 |
+
{%- endif %}
|
16 |
+
{%- endif %}
|
17 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
18 |
+
{%- for message in messages[::-1] %}
|
19 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
20 |
+
{%- set tool_start = "<tool_response>" %}
|
21 |
+
{%- set tool_start_length = tool_start|length %}
|
22 |
+
{%- set start_of_message = message.content[:tool_start_length] %}
|
23 |
+
{%- set tool_end = "</tool_response>" %}
|
24 |
+
{%- set tool_end_length = tool_end|length %}
|
25 |
+
{%- set start_pos = (message.content|length) - tool_end_length %}
|
26 |
+
{%- if start_pos < 0 %}
|
27 |
+
{%- set start_pos = 0 %}
|
28 |
+
{%- endif %}
|
29 |
+
{%- set end_of_message = message.content[start_pos:] %}
|
30 |
+
{%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
|
31 |
+
{%- set ns.multi_step_tool = false %}
|
32 |
+
{%- set ns.last_query_index = index %}
|
33 |
+
{%- endif %}
|
34 |
+
{%- endfor %}
|
35 |
+
{%- for message in messages %}
|
36 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
37 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
38 |
+
{%- elif message.role == "assistant" %}
|
39 |
+
{%- set content = message.content %}
|
40 |
+
{%- set reasoning_content = '' %}
|
41 |
+
{%- if message.reasoning_content is defined and message.reasoning_content is not none %}
|
42 |
+
{%- set reasoning_content = message.reasoning_content %}
|
43 |
+
{%- else %}
|
44 |
+
{%- if '</think>' in message.content %}
|
45 |
+
{%- set content = (message.content.split('</think>')|last).lstrip('\n') %}
|
46 |
+
{%- set reasoning_content = (message.content.split('</think>')|first).rstrip('\n') %}
|
47 |
+
{%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
|
48 |
+
{%- endif %}
|
49 |
+
{%- endif %}
|
50 |
+
{%- if loop.index0 > ns.last_query_index %}
|
51 |
+
{%- if loop.last or (not loop.last and reasoning_content) %}
|
52 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
53 |
+
{%- else %}
|
54 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
55 |
+
{%- endif %}
|
56 |
+
{%- else %}
|
57 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
58 |
+
{%- endif %}
|
59 |
+
{%- if message.tool_calls %}
|
60 |
+
{%- for tool_call in message.tool_calls %}
|
61 |
+
{%- if (loop.first and content) or (not loop.first) %}
|
62 |
+
{{- '\n' }}
|
63 |
+
{%- endif %}
|
64 |
+
{%- if tool_call.function %}
|
65 |
+
{%- set tool_call = tool_call.function %}
|
66 |
+
{%- endif %}
|
67 |
+
{{- '<tool_call>\n{"name": "' }}
|
68 |
+
{{- tool_call.name }}
|
69 |
+
{{- '", "arguments": ' }}
|
70 |
+
{%- if tool_call.arguments is string %}
|
71 |
+
{{- tool_call.arguments }}
|
72 |
+
{%- else %}
|
73 |
+
{{- tool_call.arguments | tojson }}
|
74 |
+
{%- endif %}
|
75 |
+
{{- '}\n</tool_call>' }}
|
76 |
+
{%- endfor %}
|
77 |
+
{%- endif %}
|
78 |
+
{{- '<|im_end|>\n' }}
|
79 |
+
{%- elif message.role == "tool" %}
|
80 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
81 |
+
{{- '<|im_start|>user' }}
|
82 |
+
{%- endif %}
|
83 |
+
{{- '\n<tool_response>\n' }}
|
84 |
+
{{- message.content }}
|
85 |
+
{{- '\n</tool_response>' }}
|
86 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
87 |
+
{{- '<|im_end|>\n' }}
|
88 |
+
{%- endif %}
|
89 |
+
{%- endif %}
|
90 |
+
{%- endfor %}
|
91 |
+
{%- if add_generation_prompt %}
|
92 |
+
{{- '<|im_start|>assistant\n' }}
|
93 |
+
{%- if enable_thinking is defined and enable_thinking is false %}
|
94 |
+
{{- '<think>\n\n</think>\n\n' }}
|
95 |
+
{%- endif %}
|
96 |
+
{%- endif %}
|
config.json
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"Qwen3ForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 151643,
|
8 |
+
"eos_token_id": 151645,
|
9 |
+
"head_dim": 128,
|
10 |
+
"hidden_act": "silu",
|
11 |
+
"hidden_size": 2560,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 9728,
|
14 |
+
"max_position_embeddings": 40960,
|
15 |
+
"max_window_layers": 36,
|
16 |
+
"model_type": "qwen3",
|
17 |
+
"num_attention_heads": 32,
|
18 |
+
"num_hidden_layers": 36,
|
19 |
+
"num_key_value_heads": 8,
|
20 |
+
"rms_norm_eps": 1e-06,
|
21 |
+
"rope_scaling": null,
|
22 |
+
"rope_theta": 1000000,
|
23 |
+
"sliding_window": null,
|
24 |
+
"tie_word_embeddings": true,
|
25 |
+
"torch_dtype": "bfloat16",
|
26 |
+
"transformers_version": "4.51.0",
|
27 |
+
"use_cache": true,
|
28 |
+
"use_sliding_window": false,
|
29 |
+
"vocab_size": 151936
|
30 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
{
|
4 |
+
"content": "</answer>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
},
|
10 |
+
{
|
11 |
+
"content": "</img_base64>",
|
12 |
+
"lstrip": false,
|
13 |
+
"normalized": false,
|
14 |
+
"rstrip": false,
|
15 |
+
"single_word": false
|
16 |
+
},
|
17 |
+
{
|
18 |
+
"content": "</json_output>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
},
|
24 |
+
{
|
25 |
+
"content": "</ocr_text>",
|
26 |
+
"lstrip": false,
|
27 |
+
"normalized": false,
|
28 |
+
"rstrip": false,
|
29 |
+
"single_word": false
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"content": "</think>",
|
33 |
+
"lstrip": false,
|
34 |
+
"normalized": false,
|
35 |
+
"rstrip": false,
|
36 |
+
"single_word": false
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"content": "</tool_code>",
|
40 |
+
"lstrip": false,
|
41 |
+
"normalized": false,
|
42 |
+
"rstrip": false,
|
43 |
+
"single_word": false
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"content": "<answer>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false
|
51 |
+
},
|
52 |
+
{
|
53 |
+
"content": "<img_base64>",
|
54 |
+
"lstrip": false,
|
55 |
+
"normalized": false,
|
56 |
+
"rstrip": false,
|
57 |
+
"single_word": false
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"content": "<json_output>",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": false,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"content": "<ocr_text>",
|
68 |
+
"lstrip": false,
|
69 |
+
"normalized": false,
|
70 |
+
"rstrip": false,
|
71 |
+
"single_word": false
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"content": "<think>",
|
75 |
+
"lstrip": false,
|
76 |
+
"normalized": false,
|
77 |
+
"rstrip": false,
|
78 |
+
"single_word": false
|
79 |
+
},
|
80 |
+
{
|
81 |
+
"content": "<tool_code>",
|
82 |
+
"lstrip": false,
|
83 |
+
"normalized": false,
|
84 |
+
"rstrip": false,
|
85 |
+
"single_word": false
|
86 |
+
}
|
87 |
+
],
|
88 |
+
"eos_token": {
|
89 |
+
"content": "<|im_end|>",
|
90 |
+
"lstrip": false,
|
91 |
+
"normalized": false,
|
92 |
+
"rstrip": false,
|
93 |
+
"single_word": false
|
94 |
+
},
|
95 |
+
"pad_token": {
|
96 |
+
"content": "<|endoftext|>",
|
97 |
+
"lstrip": false,
|
98 |
+
"normalized": false,
|
99 |
+
"rstrip": false,
|
100 |
+
"single_word": false
|
101 |
+
}
|
102 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,318 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"151643": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"151644": {
|
14 |
+
"content": "<|im_start|>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"151645": {
|
22 |
+
"content": "<|im_end|>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
},
|
29 |
+
"151646": {
|
30 |
+
"content": "<|object_ref_start|>",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": false,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": true
|
36 |
+
},
|
37 |
+
"151647": {
|
38 |
+
"content": "<|object_ref_end|>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": true
|
44 |
+
},
|
45 |
+
"151648": {
|
46 |
+
"content": "<|box_start|>",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": false,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": true
|
52 |
+
},
|
53 |
+
"151649": {
|
54 |
+
"content": "<|box_end|>",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": false,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": true
|
60 |
+
},
|
61 |
+
"151650": {
|
62 |
+
"content": "<|quad_start|>",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": false,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": true
|
68 |
+
},
|
69 |
+
"151651": {
|
70 |
+
"content": "<|quad_end|>",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": false,
|
73 |
+
"rstrip": false,
|
74 |
+
"single_word": false,
|
75 |
+
"special": true
|
76 |
+
},
|
77 |
+
"151652": {
|
78 |
+
"content": "<|vision_start|>",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": false,
|
81 |
+
"rstrip": false,
|
82 |
+
"single_word": false,
|
83 |
+
"special": true
|
84 |
+
},
|
85 |
+
"151653": {
|
86 |
+
"content": "<|vision_end|>",
|
87 |
+
"lstrip": false,
|
88 |
+
"normalized": false,
|
89 |
+
"rstrip": false,
|
90 |
+
"single_word": false,
|
91 |
+
"special": true
|
92 |
+
},
|
93 |
+
"151654": {
|
94 |
+
"content": "<|vision_pad|>",
|
95 |
+
"lstrip": false,
|
96 |
+
"normalized": false,
|
97 |
+
"rstrip": false,
|
98 |
+
"single_word": false,
|
99 |
+
"special": true
|
100 |
+
},
|
101 |
+
"151655": {
|
102 |
+
"content": "<|image_pad|>",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": false,
|
105 |
+
"rstrip": false,
|
106 |
+
"single_word": false,
|
107 |
+
"special": true
|
108 |
+
},
|
109 |
+
"151656": {
|
110 |
+
"content": "<|video_pad|>",
|
111 |
+
"lstrip": false,
|
112 |
+
"normalized": false,
|
113 |
+
"rstrip": false,
|
114 |
+
"single_word": false,
|
115 |
+
"special": true
|
116 |
+
},
|
117 |
+
"151657": {
|
118 |
+
"content": "<tool_call>",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": false,
|
121 |
+
"rstrip": false,
|
122 |
+
"single_word": false,
|
123 |
+
"special": false
|
124 |
+
},
|
125 |
+
"151658": {
|
126 |
+
"content": "</tool_call>",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": false,
|
129 |
+
"rstrip": false,
|
130 |
+
"single_word": false,
|
131 |
+
"special": false
|
132 |
+
},
|
133 |
+
"151659": {
|
134 |
+
"content": "<|fim_prefix|>",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": false,
|
137 |
+
"rstrip": false,
|
138 |
+
"single_word": false,
|
139 |
+
"special": false
|
140 |
+
},
|
141 |
+
"151660": {
|
142 |
+
"content": "<|fim_middle|>",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": false,
|
145 |
+
"rstrip": false,
|
146 |
+
"single_word": false,
|
147 |
+
"special": false
|
148 |
+
},
|
149 |
+
"151661": {
|
150 |
+
"content": "<|fim_suffix|>",
|
151 |
+
"lstrip": false,
|
152 |
+
"normalized": false,
|
153 |
+
"rstrip": false,
|
154 |
+
"single_word": false,
|
155 |
+
"special": false
|
156 |
+
},
|
157 |
+
"151662": {
|
158 |
+
"content": "<|fim_pad|>",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": false,
|
161 |
+
"rstrip": false,
|
162 |
+
"single_word": false,
|
163 |
+
"special": false
|
164 |
+
},
|
165 |
+
"151663": {
|
166 |
+
"content": "<|repo_name|>",
|
167 |
+
"lstrip": false,
|
168 |
+
"normalized": false,
|
169 |
+
"rstrip": false,
|
170 |
+
"single_word": false,
|
171 |
+
"special": false
|
172 |
+
},
|
173 |
+
"151664": {
|
174 |
+
"content": "<|file_sep|>",
|
175 |
+
"lstrip": false,
|
176 |
+
"normalized": false,
|
177 |
+
"rstrip": false,
|
178 |
+
"single_word": false,
|
179 |
+
"special": false
|
180 |
+
},
|
181 |
+
"151665": {
|
182 |
+
"content": "<tool_response>",
|
183 |
+
"lstrip": false,
|
184 |
+
"normalized": false,
|
185 |
+
"rstrip": false,
|
186 |
+
"single_word": false,
|
187 |
+
"special": false
|
188 |
+
},
|
189 |
+
"151666": {
|
190 |
+
"content": "</tool_response>",
|
191 |
+
"lstrip": false,
|
192 |
+
"normalized": false,
|
193 |
+
"rstrip": false,
|
194 |
+
"single_word": false,
|
195 |
+
"special": false
|
196 |
+
},
|
197 |
+
"151667": {
|
198 |
+
"content": "<think>",
|
199 |
+
"lstrip": false,
|
200 |
+
"normalized": false,
|
201 |
+
"rstrip": false,
|
202 |
+
"single_word": false,
|
203 |
+
"special": true
|
204 |
+
},
|
205 |
+
"151668": {
|
206 |
+
"content": "</think>",
|
207 |
+
"lstrip": false,
|
208 |
+
"normalized": false,
|
209 |
+
"rstrip": false,
|
210 |
+
"single_word": false,
|
211 |
+
"special": true
|
212 |
+
},
|
213 |
+
"151669": {
|
214 |
+
"content": "</answer>",
|
215 |
+
"lstrip": false,
|
216 |
+
"normalized": false,
|
217 |
+
"rstrip": false,
|
218 |
+
"single_word": false,
|
219 |
+
"special": true
|
220 |
+
},
|
221 |
+
"151670": {
|
222 |
+
"content": "</img_base64>",
|
223 |
+
"lstrip": false,
|
224 |
+
"normalized": false,
|
225 |
+
"rstrip": false,
|
226 |
+
"single_word": false,
|
227 |
+
"special": true
|
228 |
+
},
|
229 |
+
"151671": {
|
230 |
+
"content": "</json_output>",
|
231 |
+
"lstrip": false,
|
232 |
+
"normalized": false,
|
233 |
+
"rstrip": false,
|
234 |
+
"single_word": false,
|
235 |
+
"special": true
|
236 |
+
},
|
237 |
+
"151672": {
|
238 |
+
"content": "</ocr_text>",
|
239 |
+
"lstrip": false,
|
240 |
+
"normalized": false,
|
241 |
+
"rstrip": false,
|
242 |
+
"single_word": false,
|
243 |
+
"special": true
|
244 |
+
},
|
245 |
+
"151673": {
|
246 |
+
"content": "</tool_code>",
|
247 |
+
"lstrip": false,
|
248 |
+
"normalized": false,
|
249 |
+
"rstrip": false,
|
250 |
+
"single_word": false,
|
251 |
+
"special": true
|
252 |
+
},
|
253 |
+
"151674": {
|
254 |
+
"content": "<answer>",
|
255 |
+
"lstrip": false,
|
256 |
+
"normalized": false,
|
257 |
+
"rstrip": false,
|
258 |
+
"single_word": false,
|
259 |
+
"special": true
|
260 |
+
},
|
261 |
+
"151675": {
|
262 |
+
"content": "<img_base64>",
|
263 |
+
"lstrip": false,
|
264 |
+
"normalized": false,
|
265 |
+
"rstrip": false,
|
266 |
+
"single_word": false,
|
267 |
+
"special": true
|
268 |
+
},
|
269 |
+
"151676": {
|
270 |
+
"content": "<json_output>",
|
271 |
+
"lstrip": false,
|
272 |
+
"normalized": false,
|
273 |
+
"rstrip": false,
|
274 |
+
"single_word": false,
|
275 |
+
"special": true
|
276 |
+
},
|
277 |
+
"151677": {
|
278 |
+
"content": "<ocr_text>",
|
279 |
+
"lstrip": false,
|
280 |
+
"normalized": false,
|
281 |
+
"rstrip": false,
|
282 |
+
"single_word": false,
|
283 |
+
"special": true
|
284 |
+
},
|
285 |
+
"151678": {
|
286 |
+
"content": "<tool_code>",
|
287 |
+
"lstrip": false,
|
288 |
+
"normalized": false,
|
289 |
+
"rstrip": false,
|
290 |
+
"single_word": false,
|
291 |
+
"special": true
|
292 |
+
}
|
293 |
+
},
|
294 |
+
"additional_special_tokens": [
|
295 |
+
"</answer>",
|
296 |
+
"</img_base64>",
|
297 |
+
"</json_output>",
|
298 |
+
"</ocr_text>",
|
299 |
+
"</think>",
|
300 |
+
"</tool_code>",
|
301 |
+
"<answer>",
|
302 |
+
"<img_base64>",
|
303 |
+
"<json_output>",
|
304 |
+
"<ocr_text>",
|
305 |
+
"<think>",
|
306 |
+
"<tool_code>"
|
307 |
+
],
|
308 |
+
"bos_token": null,
|
309 |
+
"clean_up_tokenization_spaces": false,
|
310 |
+
"eos_token": "<|im_end|>",
|
311 |
+
"errors": "replace",
|
312 |
+
"extra_special_tokens": {},
|
313 |
+
"model_max_length": 131072,
|
314 |
+
"pad_token": "<|endoftext|>",
|
315 |
+
"split_special_tokens": false,
|
316 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
317 |
+
"unk_token": null
|
318 |
+
}
|
training_state.json
ADDED
@@ -0,0 +1,747 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"global_step": 0,
|
3 |
+
"num_updates": 3590,
|
4 |
+
"use_lora": false,
|
5 |
+
"rng_state": {
|
6 |
+
"python": [
|
7 |
+
3,
|
8 |
+
[
|
9 |
+
3791798607,
|
10 |
+
3508723702,
|
11 |
+
1862230724,
|
12 |
+
423765546,
|
13 |
+
3360904558,
|
14 |
+
2976996065,
|
15 |
+
4026401179,
|
16 |
+
3269511641,
|
17 |
+
275389670,
|
18 |
+
157886210,
|
19 |
+
3604966786,
|
20 |
+
1576225624,
|
21 |
+
1867254837,
|
22 |
+
3419695843,
|
23 |
+
21156375,
|
24 |
+
4203904519,
|
25 |
+
2637425097,
|
26 |
+
1317724325,
|
27 |
+
3178606684,
|
28 |
+
3315622916,
|
29 |
+
4237960391,
|
30 |
+
441245789,
|
31 |
+
1685467536,
|
32 |
+
1471869905,
|
33 |
+
3416700604,
|
34 |
+
1814775137,
|
35 |
+
2292040935,
|
36 |
+
3320004111,
|
37 |
+
3332838803,
|
38 |
+
664201542,
|
39 |
+
1661384054,
|
40 |
+
396470432,
|
41 |
+
1133745642,
|
42 |
+
966393212,
|
43 |
+
2433724892,
|
44 |
+
282057528,
|
45 |
+
1844924806,
|
46 |
+
1214319897,
|
47 |
+
1158322072,
|
48 |
+
3598810979,
|
49 |
+
1964073604,
|
50 |
+
3084091070,
|
51 |
+
1726897389,
|
52 |
+
1324138105,
|
53 |
+
1766413618,
|
54 |
+
3156956893,
|
55 |
+
2688040703,
|
56 |
+
3696579993,
|
57 |
+
3304327384,
|
58 |
+
2525659912,
|
59 |
+
2345438528,
|
60 |
+
1883585263,
|
61 |
+
946672308,
|
62 |
+
1933249257,
|
63 |
+
266734331,
|
64 |
+
990935410,
|
65 |
+
1234789843,
|
66 |
+
2563747558,
|
67 |
+
675724960,
|
68 |
+
1696505439,
|
69 |
+
901430351,
|
70 |
+
1004958032,
|
71 |
+
2338881853,
|
72 |
+
872678686,
|
73 |
+
3962989789,
|
74 |
+
890448495,
|
75 |
+
1308763703,
|
76 |
+
3197811409,
|
77 |
+
4235026220,
|
78 |
+
2813766648,
|
79 |
+
1979263247,
|
80 |
+
2137204718,
|
81 |
+
415952213,
|
82 |
+
3592125671,
|
83 |
+
1503781988,
|
84 |
+
2658439850,
|
85 |
+
499584104,
|
86 |
+
2921273816,
|
87 |
+
1347030452,
|
88 |
+
131448433,
|
89 |
+
154680112,
|
90 |
+
2871666894,
|
91 |
+
1776168764,
|
92 |
+
3437077275,
|
93 |
+
566708203,
|
94 |
+
353298691,
|
95 |
+
3064089582,
|
96 |
+
1924282975,
|
97 |
+
3069172070,
|
98 |
+
3881125771,
|
99 |
+
2686835323,
|
100 |
+
2202883673,
|
101 |
+
2983894100,
|
102 |
+
2532403958,
|
103 |
+
905173035,
|
104 |
+
3384776911,
|
105 |
+
3543481043,
|
106 |
+
1104792988,
|
107 |
+
172600784,
|
108 |
+
852845710,
|
109 |
+
1749061473,
|
110 |
+
2782451028,
|
111 |
+
1738166861,
|
112 |
+
808471146,
|
113 |
+
75473991,
|
114 |
+
3181456623,
|
115 |
+
3476014389,
|
116 |
+
1809418779,
|
117 |
+
3276261205,
|
118 |
+
3101297767,
|
119 |
+
1176778017,
|
120 |
+
494872551,
|
121 |
+
161986708,
|
122 |
+
4104250722,
|
123 |
+
560876316,
|
124 |
+
3149459210,
|
125 |
+
1887589567,
|
126 |
+
2540570549,
|
127 |
+
1436003052,
|
128 |
+
3555098651,
|
129 |
+
3236132906,
|
130 |
+
1526726316,
|
131 |
+
219734931,
|
132 |
+
178109730,
|
133 |
+
3020143888,
|
134 |
+
1032320528,
|
135 |
+
3121469492,
|
136 |
+
2267836622,
|
137 |
+
1979604131,
|
138 |
+
1459882342,
|
139 |
+
327442475,
|
140 |
+
139660963,
|
141 |
+
3459859838,
|
142 |
+
4183571040,
|
143 |
+
3773617963,
|
144 |
+
1421923205,
|
145 |
+
1617667923,
|
146 |
+
1831710792,
|
147 |
+
1868983109,
|
148 |
+
1203196426,
|
149 |
+
4031918297,
|
150 |
+
3458521698,
|
151 |
+
1438302201,
|
152 |
+
2233317871,
|
153 |
+
1042019977,
|
154 |
+
3971726408,
|
155 |
+
3609045633,
|
156 |
+
1531082287,
|
157 |
+
2521221056,
|
158 |
+
1163834992,
|
159 |
+
520730668,
|
160 |
+
109466096,
|
161 |
+
2117473953,
|
162 |
+
3957298452,
|
163 |
+
1912024592,
|
164 |
+
2245980257,
|
165 |
+
1509375304,
|
166 |
+
1259372384,
|
167 |
+
1366201837,
|
168 |
+
3082547995,
|
169 |
+
973791605,
|
170 |
+
2590357110,
|
171 |
+
85706583,
|
172 |
+
335281593,
|
173 |
+
3581975608,
|
174 |
+
2750458790,
|
175 |
+
1372538030,
|
176 |
+
2667555278,
|
177 |
+
3549191221,
|
178 |
+
3487906272,
|
179 |
+
4103507437,
|
180 |
+
2581027775,
|
181 |
+
412934344,
|
182 |
+
1905027790,
|
183 |
+
1963672071,
|
184 |
+
1068170280,
|
185 |
+
1084195240,
|
186 |
+
642646672,
|
187 |
+
2178370606,
|
188 |
+
1548826689,
|
189 |
+
1981647093,
|
190 |
+
373225784,
|
191 |
+
418383368,
|
192 |
+
4699831,
|
193 |
+
2512478083,
|
194 |
+
2767508355,
|
195 |
+
3132234488,
|
196 |
+
335651773,
|
197 |
+
1041965187,
|
198 |
+
4257854977,
|
199 |
+
1428984031,
|
200 |
+
1759475221,
|
201 |
+
3687932570,
|
202 |
+
2825806138,
|
203 |
+
3890881760,
|
204 |
+
1078262724,
|
205 |
+
2402468176,
|
206 |
+
11204346,
|
207 |
+
4276914446,
|
208 |
+
2123277743,
|
209 |
+
1160998260,
|
210 |
+
2475410534,
|
211 |
+
255461178,
|
212 |
+
4066044407,
|
213 |
+
1418067188,
|
214 |
+
2431568669,
|
215 |
+
2585593530,
|
216 |
+
2811606620,
|
217 |
+
2394070747,
|
218 |
+
2471499884,
|
219 |
+
55286242,
|
220 |
+
858185667,
|
221 |
+
2646193766,
|
222 |
+
1709688773,
|
223 |
+
116810203,
|
224 |
+
2126350562,
|
225 |
+
1158274514,
|
226 |
+
3445272705,
|
227 |
+
1835769634,
|
228 |
+
3336690970,
|
229 |
+
3310332478,
|
230 |
+
1632566335,
|
231 |
+
3481999723,
|
232 |
+
851301520,
|
233 |
+
4069333420,
|
234 |
+
576167503,
|
235 |
+
3682331274,
|
236 |
+
4188349083,
|
237 |
+
3912509820,
|
238 |
+
816864749,
|
239 |
+
946707769,
|
240 |
+
3763710283,
|
241 |
+
3804925848,
|
242 |
+
3437155331,
|
243 |
+
1072451052,
|
244 |
+
1315648059,
|
245 |
+
3071431808,
|
246 |
+
3353610895,
|
247 |
+
3339423625,
|
248 |
+
263458372,
|
249 |
+
4252759887,
|
250 |
+
1308957122,
|
251 |
+
4058402240,
|
252 |
+
510857746,
|
253 |
+
3220565200,
|
254 |
+
4047335219,
|
255 |
+
2575907388,
|
256 |
+
2130611296,
|
257 |
+
3366038278,
|
258 |
+
2994936704,
|
259 |
+
941075105,
|
260 |
+
260556625,
|
261 |
+
3496894279,
|
262 |
+
243511529,
|
263 |
+
3480197388,
|
264 |
+
1537026765,
|
265 |
+
3390418189,
|
266 |
+
2944600372,
|
267 |
+
3796863563,
|
268 |
+
3624449058,
|
269 |
+
2131577969,
|
270 |
+
2791054582,
|
271 |
+
2893238345,
|
272 |
+
4095846151,
|
273 |
+
3192838959,
|
274 |
+
2408121225,
|
275 |
+
3060659478,
|
276 |
+
3250830159,
|
277 |
+
2929292806,
|
278 |
+
3685282561,
|
279 |
+
2376718166,
|
280 |
+
1815732001,
|
281 |
+
2212533003,
|
282 |
+
89614323,
|
283 |
+
1066144006,
|
284 |
+
3106493252,
|
285 |
+
2829951825,
|
286 |
+
1081964717,
|
287 |
+
1121213920,
|
288 |
+
2989735772,
|
289 |
+
2907604913,
|
290 |
+
12501683,
|
291 |
+
331413626,
|
292 |
+
527773008,
|
293 |
+
2747085630,
|
294 |
+
3594554524,
|
295 |
+
566506130,
|
296 |
+
994544547,
|
297 |
+
3372843686,
|
298 |
+
2089640887,
|
299 |
+
1616697999,
|
300 |
+
3942432075,
|
301 |
+
2679950308,
|
302 |
+
3788303822,
|
303 |
+
319205117,
|
304 |
+
2876175566,
|
305 |
+
1713058936,
|
306 |
+
2121569686,
|
307 |
+
1203646916,
|
308 |
+
163061713,
|
309 |
+
831889715,
|
310 |
+
3761632024,
|
311 |
+
415631820,
|
312 |
+
2176866432,
|
313 |
+
237308060,
|
314 |
+
277392912,
|
315 |
+
107363868,
|
316 |
+
3672934978,
|
317 |
+
3259752662,
|
318 |
+
1348569905,
|
319 |
+
2614112770,
|
320 |
+
3225211995,
|
321 |
+
4109705827,
|
322 |
+
2115230490,
|
323 |
+
3914436432,
|
324 |
+
1757776839,
|
325 |
+
3251722925,
|
326 |
+
3842211185,
|
327 |
+
114285303,
|
328 |
+
3138168880,
|
329 |
+
2527562706,
|
330 |
+
2689710202,
|
331 |
+
812955489,
|
332 |
+
2695875693,
|
333 |
+
2649021527,
|
334 |
+
1548682507,
|
335 |
+
2618208879,
|
336 |
+
588483419,
|
337 |
+
869835196,
|
338 |
+
1954839821,
|
339 |
+
1603316919,
|
340 |
+
2657453307,
|
341 |
+
3601036760,
|
342 |
+
1993917009,
|
343 |
+
3045984060,
|
344 |
+
2417943065,
|
345 |
+
2000739257,
|
346 |
+
1205247511,
|
347 |
+
696339016,
|
348 |
+
721428081,
|
349 |
+
3605076683,
|
350 |
+
3324713745,
|
351 |
+
3809857083,
|
352 |
+
4250499107,
|
353 |
+
1731259867,
|
354 |
+
1901551373,
|
355 |
+
539916285,
|
356 |
+
1715289600,
|
357 |
+
2395721412,
|
358 |
+
3095597749,
|
359 |
+
219348034,
|
360 |
+
3853870073,
|
361 |
+
867230794,
|
362 |
+
3221687528,
|
363 |
+
773109991,
|
364 |
+
1158065915,
|
365 |
+
302912679,
|
366 |
+
1788010892,
|
367 |
+
2998221362,
|
368 |
+
176100714,
|
369 |
+
3138225066,
|
370 |
+
2127748792,
|
371 |
+
616596859,
|
372 |
+
2820876182,
|
373 |
+
2602842530,
|
374 |
+
2121841955,
|
375 |
+
1000891758,
|
376 |
+
2176255864,
|
377 |
+
2508538737,
|
378 |
+
1538834660,
|
379 |
+
1630836986,
|
380 |
+
4255517646,
|
381 |
+
155700998,
|
382 |
+
2862979844,
|
383 |
+
3941687337,
|
384 |
+
1475200360,
|
385 |
+
1685615351,
|
386 |
+
693189788,
|
387 |
+
3924851585,
|
388 |
+
1275560809,
|
389 |
+
3983995141,
|
390 |
+
685370814,
|
391 |
+
4252289188,
|
392 |
+
3050523552,
|
393 |
+
2982975236,
|
394 |
+
1892370979,
|
395 |
+
689935726,
|
396 |
+
4080580786,
|
397 |
+
3123042645,
|
398 |
+
3623123320,
|
399 |
+
2520693297,
|
400 |
+
3926208041,
|
401 |
+
2428254062,
|
402 |
+
2907006300,
|
403 |
+
2632290320,
|
404 |
+
2932448640,
|
405 |
+
1792102501,
|
406 |
+
697949638,
|
407 |
+
2916216572,
|
408 |
+
3725891177,
|
409 |
+
1130484921,
|
410 |
+
331836520,
|
411 |
+
3644799562,
|
412 |
+
482068501,
|
413 |
+
2131449980,
|
414 |
+
2503775292,
|
415 |
+
1652066677,
|
416 |
+
2360173474,
|
417 |
+
1523096025,
|
418 |
+
1576395116,
|
419 |
+
566948010,
|
420 |
+
2161677405,
|
421 |
+
1628062395,
|
422 |
+
2830994943,
|
423 |
+
2782852037,
|
424 |
+
2944886851,
|
425 |
+
2859364654,
|
426 |
+
1624775290,
|
427 |
+
2050779801,
|
428 |
+
1801491861,
|
429 |
+
3024623926,
|
430 |
+
3632805628,
|
431 |
+
446677200,
|
432 |
+
3029070303,
|
433 |
+
1449107449,
|
434 |
+
4027126884,
|
435 |
+
2634169703,
|
436 |
+
3556965880,
|
437 |
+
164674669,
|
438 |
+
3583788166,
|
439 |
+
1331237056,
|
440 |
+
4224223452,
|
441 |
+
3170672551,
|
442 |
+
3214455519,
|
443 |
+
3333358790,
|
444 |
+
2895977729,
|
445 |
+
2103662835,
|
446 |
+
2535360537,
|
447 |
+
1106604819,
|
448 |
+
50616437,
|
449 |
+
733415910,
|
450 |
+
2974218999,
|
451 |
+
4283137563,
|
452 |
+
2392938005,
|
453 |
+
699696764,
|
454 |
+
199297075,
|
455 |
+
149203376,
|
456 |
+
4294907447,
|
457 |
+
1233659753,
|
458 |
+
2924089982,
|
459 |
+
3718263758,
|
460 |
+
922334226,
|
461 |
+
1630701409,
|
462 |
+
2249067984,
|
463 |
+
1175884184,
|
464 |
+
656224305,
|
465 |
+
2590834245,
|
466 |
+
3878996080,
|
467 |
+
3541986337,
|
468 |
+
3346147440,
|
469 |
+
1675417580,
|
470 |
+
1005623759,
|
471 |
+
1351087148,
|
472 |
+
2534202449,
|
473 |
+
2774253889,
|
474 |
+
4030077821,
|
475 |
+
4162064801,
|
476 |
+
3233915124,
|
477 |
+
426362054,
|
478 |
+
2916309146,
|
479 |
+
3148309231,
|
480 |
+
3658127903,
|
481 |
+
145850117,
|
482 |
+
1336358496,
|
483 |
+
1150078683,
|
484 |
+
4115657154,
|
485 |
+
3436314090,
|
486 |
+
1432849230,
|
487 |
+
4133793317,
|
488 |
+
451828536,
|
489 |
+
874568037,
|
490 |
+
342643691,
|
491 |
+
3776773986,
|
492 |
+
2629014065,
|
493 |
+
2714404173,
|
494 |
+
3021236714,
|
495 |
+
3802530299,
|
496 |
+
3963515315,
|
497 |
+
3879653946,
|
498 |
+
2468583149,
|
499 |
+
1802079057,
|
500 |
+
1419539419,
|
501 |
+
1252792083,
|
502 |
+
3622341092,
|
503 |
+
226929186,
|
504 |
+
213328774,
|
505 |
+
47630998,
|
506 |
+
1627039765,
|
507 |
+
3035005937,
|
508 |
+
3196066617,
|
509 |
+
362743007,
|
510 |
+
2973455295,
|
511 |
+
3579485324,
|
512 |
+
326533620,
|
513 |
+
2884198067,
|
514 |
+
3153663419,
|
515 |
+
2302722986,
|
516 |
+
482112633,
|
517 |
+
1421708982,
|
518 |
+
2805864311,
|
519 |
+
787266841,
|
520 |
+
3803728003,
|
521 |
+
1041806175,
|
522 |
+
1838794913,
|
523 |
+
2188916261,
|
524 |
+
1249790098,
|
525 |
+
4132191657,
|
526 |
+
1745945255,
|
527 |
+
2607835519,
|
528 |
+
2895235477,
|
529 |
+
2682206235,
|
530 |
+
1563857049,
|
531 |
+
719254302,
|
532 |
+
1727003882,
|
533 |
+
1638359292,
|
534 |
+
2573218425,
|
535 |
+
1323082026,
|
536 |
+
3993823304,
|
537 |
+
1407145597,
|
538 |
+
3022185332,
|
539 |
+
643003170,
|
540 |
+
3870122829,
|
541 |
+
4261057685,
|
542 |
+
1500064439,
|
543 |
+
1439548519,
|
544 |
+
2669842750,
|
545 |
+
2133133171,
|
546 |
+
663382311,
|
547 |
+
1046013526,
|
548 |
+
2775240114,
|
549 |
+
4263323165,
|
550 |
+
3387006231,
|
551 |
+
866933975,
|
552 |
+
3379642736,
|
553 |
+
2849198251,
|
554 |
+
708156414,
|
555 |
+
2339953422,
|
556 |
+
3509105864,
|
557 |
+
2970913812,
|
558 |
+
1962970918,
|
559 |
+
565038762,
|
560 |
+
3198078760,
|
561 |
+
599155429,
|
562 |
+
3819935618,
|
563 |
+
2594529554,
|
564 |
+
2047511262,
|
565 |
+
119780287,
|
566 |
+
2779943738,
|
567 |
+
1984361926,
|
568 |
+
2523828110,
|
569 |
+
1332578451,
|
570 |
+
3023519940,
|
571 |
+
3316244158,
|
572 |
+
1334613955,
|
573 |
+
709135042,
|
574 |
+
2011105319,
|
575 |
+
2364911526,
|
576 |
+
3603716238,
|
577 |
+
1024992433,
|
578 |
+
1500078584,
|
579 |
+
877597136,
|
580 |
+
2201490519,
|
581 |
+
1523212043,
|
582 |
+
2553742610,
|
583 |
+
510067056,
|
584 |
+
2205240181,
|
585 |
+
1085756406,
|
586 |
+
606367984,
|
587 |
+
2625847287,
|
588 |
+
1807867581,
|
589 |
+
1713701313,
|
590 |
+
3282040430,
|
591 |
+
3225796067,
|
592 |
+
818050665,
|
593 |
+
2829488825,
|
594 |
+
3594971883,
|
595 |
+
2489750525,
|
596 |
+
4120069007,
|
597 |
+
3624684917,
|
598 |
+
2484791507,
|
599 |
+
2610646231,
|
600 |
+
1781863859,
|
601 |
+
1132452345,
|
602 |
+
2562173440,
|
603 |
+
3824504247,
|
604 |
+
2484956779,
|
605 |
+
299418605,
|
606 |
+
2454398589,
|
607 |
+
1937337619,
|
608 |
+
1996159259,
|
609 |
+
1128119611,
|
610 |
+
3802148057,
|
611 |
+
2670314587,
|
612 |
+
4006135860,
|
613 |
+
1939179668,
|
614 |
+
95540832,
|
615 |
+
2978800480,
|
616 |
+
969468339,
|
617 |
+
3385050485,
|
618 |
+
2633280880,
|
619 |
+
232978280,
|
620 |
+
82021996,
|
621 |
+
4097703795,
|
622 |
+
794387923,
|
623 |
+
2334499177,
|
624 |
+
3853372066,
|
625 |
+
1952970090,
|
626 |
+
3766586225,
|
627 |
+
699990280,
|
628 |
+
780454988,
|
629 |
+
2988521735,
|
630 |
+
3888953611,
|
631 |
+
3039554259,
|
632 |
+
2195243037,
|
633 |
+
9
|
634 |
+
],
|
635 |
+
null
|
636 |
+
],
|
637 |
+
"numpy": [
|
638 |
+
"MT19937",
|
639 |
+
"[ 3451 1234773128 3384261391 367671231 393622879 231466368\n 3586329478 599832704 1229277832 3539515670 1119896147 2710965349\n 3618046895 529497833 48677627 3695198230 3828843865 1572521619\n 4109531564 2391910686 2730210336 3965862783 21363714 2961669857\n 1687259815 1663601047 3686370888 2238203570 2663912332 675853603\n 2241081837 867801706 2898437618 489220049 3890826903 4028970119\n 3345243192 3381720684 2870714609 900565766 1644695430 1580097388\n 2438462251 2537424984 275215790 1988987859 4033448968 3454215046\n 3712102569 3285069123 3545176690 410183624 3654777372 2906047856\n 610527280 1811696423 3033201718 3396949181 1231691056 953400720\n 2867583500 2233501123 28315235 182298702 1789338886 2113100548\n 2245760571 2056635840 1479691881 3165034317 198965617 3831358172\n 2676808003 2946269166 413647974 3910472329 4127337150 518324702\n 1936427748 2131057064 3115766013 1710353388 523168211 3513330578\n 1137035657 2023377661 2212786626 1715511575 2639156230 4143532525\n 1181518144 185397248 3632967772 2377346264 3741359200 1503899246\n 2398501163 131759758 1691348072 3463149264 918670499 2909665204\n 3555533108 2681571866 1020154848 862582729 2431144887 644342740\n 1790269200 2996466466 3295382542 1555838608 4184613029 833337327\n 1505196221 841170591 1858325551 2026253467 3829662520 1865210302\n 1701978067 2412507219 2638752111 3382619516 3370936983 609960161\n 3755999811 1146430399 2817116278 3333302853 1195385888 1485499016\n 2081479057 2852382293 707751897 2726380068 544173958 55555687\n 1742804525 2615102439 2918809317 346665136 189851134 871476165\n 2712154697 3461544744 3511950217 2444343301 2462734935 2290098462\n 604572834 3217206913 3771200839 369174637 1678919835 944181085\n 3805074765 3591956067 2545697918 3444662667 2567258952 1058809555\n 178732257 1236692584 116310289 4110107994 2427321795 2377672396\n 1348914670 843917044 586478318 3978770065 2729093702 3662647681\n 2295397112 630771537 1936925861 1535457637 688007462 2112459441\n 4017557540 380583704 2509225262 2854629139 95923053 942602170\n 2335196956 1508711825 3998136972 2496602664 908522832 2882967887\n 1355788577 1603994209 1024282786 3268772525 468561258 2893620631\n 3288043919 4260911363 2308709832 3742292859 2492976930 127719531\n 4288652803 3553787597 2542421524 4222184829 467837318 998535343\n 1146559709 785653663 4025848207 563763473 1996897675 858464841\n 22845605 3753608946 4241110767 60815351 93389903 1068366600\n 2016629510 484409762 3824932810 1800778798 4152770157 2623635529\n 103596411 2442144108 504248140 172457443 2244486775 1304349202\n 1357833577 3973917427 1003250076 1931673465 3285058374 3001694248\n 798794626 3973780795 3847378954 3201863040 3854067518 3752550662\n 3098277103 2422568824 21917210 2006145851 4287301852 2343753718\n 2865547328 1271158023 3508519516 3844441978 1033712829 2640966802\n 1050578386 120674013 3657337653 3496399699 3335991190 3295642576\n 3427790407 2514970077 3605563653 2184988265 3026455619 2010030002\n 3118517677 395103770 1066351442 4051651179 1323435034 2484802234\n 247126956 1627109361 1078919878 2937690266 334138640 4235671657\n 2871000300 4101777665 3724296422 4119013494 2224591943 560885080\n 1719483608 2867340222 4074354254 3398378116 1772347239 2214509155\n 1776579179 3779190521 2955330506 4169746193 246394180 1004847359\n 3884031175 2892217985 3722659293 3156238277 511199667 3972718544\n 546781809 252152008 329515036 1418135617 3597697654 3177649760\n 3646179298 3151939070 280754598 239949241 1802622009 3154293589\n 980111761 3790915700 3558590259 1054754609 1612861591 3334738545\n 4065872446 2688217174 3291072106 2022209972 1189929649 865444025\n 3726273351 1655682847 2720288290 151122925 1086984911 527149717\n 2312571673 679859704 370615338 3466706917 1556925714 3591443668\n 3737471273 807622889 2732967493 2612279900 568940144 3714124683\n 4146943492 371044640 4116775678 2328532016 1352219418 3469919752\n 1936787129 3610779643 1913148988 2333846646 1482759978 3593228382\n 2768796441 1744242192 2368438047 2935663836 2440758018 1327706221\n 4039108618 3542244860 3604319755 1299927705 4077055850 1244047072\n 1823818041 1274131597 1338976434 4201110294 952806849 188340126\n 4018643152 2136498106 188078403 1918407660 2341942015 500925520\n 2677671184 536388507 2313388201 3477751802 2359404481 3638791284\n 1664293753 3710193631 1411383124 664222482 3267686820 1284945774\n 3081357143 2700813078 1315343474 717485550 3748554358 1687554746\n 705515609 1974425008 1189054825 949084061 3145940871 275332624\n 1043074536 1546298913 983215162 315031165 2552122861 825403368\n 4230965286 1383261496 1418654237 1253737133 1849138046 312496574\n 2301685658 4256096413 2021993980 3490876280 3092438063 297792618\n 560867708 2534259607 4007706741 2430932539 1792362283 4083191617\n 1955649722 1379833464 445063279 2125739134 1890168527 4045420283\n 2145937806 236465698 455632674 2148525091 809610175 1963653910\n 2673172687 2937579422 77913674 1543982833 2695927408 457989819\n 519074953 3740945616 161627139 2268350964 552588756 2698629739\n 342439477 855097778 373665540 2369991263 1308730749 2123999161\n 1667802470 849240690 2219432650 313547449 1194315215 866218521\n 1869637297 756856133 976777999 2806083010 13373080 175709137\n 3764507727 3209261015 2946833893 4000738560 3022298893 1492830666\n 2038186487 51162607 2459863085 1018553198 3933903434 586084786\n 1534828320 2245358316 2486810574 1958064485 1364820574 2746108006\n 54269280 2210713805 1510877849 1720330215 2200294830 1694128845\n 1559298158 2069468350 1076553807 2304861883 779454963 2228713942\n 2114029212 1911497450 91589553 2365118416 1214941654 246248144\n 2156895758 4136749243 1760995992 2697736542 3766335310 1473022052\n 1869799133 1503242961 28947990 1397792437 1252586252 1366826\n 3886674844 382088902 384668714 4113297567 2724031386 513264903\n 3721241811 3506788385 338950524 3949513727 3647363200 978585796\n 3912522602 3530703748 4192789851 2701563345 713820505 2276414264\n 441180670 1372789075 2316558712 3218620737 2014551183 241615911\n 2123711621 1646964279 495690610 4071456799 3905325362 2182503292\n 4063922398 3730916954 1131766855 3051620297 2779756611 3719658706\n 773659043 1000888958 1128997862 841796436 3284676182 2132330172\n 2454491077 2476161720 1009629080 1517406511 2077228894 944937396\n 2428762686 635426791 388747103 1440459192 495778107 3466324358\n 1914589369 338711257 2740310751 1589606516 2150901101 1453600272\n 2045003771 2976592361 3731781375 1881526197 419317838 3542501137\n 1403039334 881520624 3838319614 3654840864 2859386143 620448706\n 4244486108 2996364366 2526199888 3406808239 2116855346 3153321846\n 3052945180 109705007 4294946533 2764249369 2099600643 3247556135\n 4236800658 4112240276 4020785907 643509009 1165695255 1100459537\n 2406366900 352855603 940400517 4261928672 1268215031 2663884151\n 1585637011 1023876101 3253714277 96162219 3779640805 144507949]",
|
640 |
+
624,
|
641 |
+
0,
|
642 |
+
0.0
|
643 |
+
],
|
644 |
+
"mlx": [
|
645 |
+
0,
|
646 |
+
0
|
647 |
+
]
|
648 |
+
},
|
649 |
+
"training_args_snapshot": {
|
650 |
+
"output_dir": "outy125",
|
651 |
+
"max_kv_size": 1024,
|
652 |
+
"model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen3-4B-MLX-8bit",
|
653 |
+
"ref_model_path": "/Users/adeelahmad/.cache/lm-studio/models/lmstudio-community/Qwen3-4B-MLX-8bit.q",
|
654 |
+
"train_dataset_path": "/Users/adeelahmad/Downloads/cbaxx-out/train.jsonl",
|
655 |
+
"val_dataset_path": "../dataset_aws/valid.jsonl",
|
656 |
+
"dataset_name": null,
|
657 |
+
"dataset_config": null,
|
658 |
+
"dataset_train_split": "train",
|
659 |
+
"dataset_val_split": "test",
|
660 |
+
"dataset_prompt_key": "prompt",
|
661 |
+
"dataset_answer_key": "completion",
|
662 |
+
"resume_from_checkpoint": "/Users/adeelahmad/work/SiLLM-examples/helpsteer/mlx-grpo/outy125/checkpoint_20250828_135333_periodic_update_3580",
|
663 |
+
"max_prompt_len": 350,
|
664 |
+
"max_gen_len": 224,
|
665 |
+
"system_prompt": null,
|
666 |
+
"think_start_tag": "<think>",
|
667 |
+
"think_end_tag": "</think>",
|
668 |
+
"answer_start_tag": "<answer>",
|
669 |
+
"answer_end_tag": "</answer>",
|
670 |
+
"init_new_embeddings_with_mean": true,
|
671 |
+
"use_lora": false,
|
672 |
+
"lora_parameters": {
|
673 |
+
"rank": 8,
|
674 |
+
"alpha": 16,
|
675 |
+
"target_modules": [
|
676 |
+
"q_proj",
|
677 |
+
"v_proj"
|
678 |
+
],
|
679 |
+
"scale_by_rank": true,
|
680 |
+
"dropout": 0.0
|
681 |
+
},
|
682 |
+
"learning_rate": 2e-06,
|
683 |
+
"lr_schedule_config": {
|
684 |
+
"name": "cosine_decay",
|
685 |
+
"arguments": [
|
686 |
+
2e-06,
|
687 |
+
45853,
|
688 |
+
2e-07
|
689 |
+
],
|
690 |
+
"warmup": 16,
|
691 |
+
"warmup_init": 2e-07
|
692 |
+
},
|
693 |
+
"grad_clip_norm": 0.5,
|
694 |
+
"optimizer_beta1": 0.9,
|
695 |
+
"optimizer_beta2": 0.95,
|
696 |
+
"optimizer_weight_decay": 0.01,
|
697 |
+
"save_optimizer_state": false,
|
698 |
+
"num_rollout_samples": 4,
|
699 |
+
"ppo_batch_size": 1,
|
700 |
+
"sampling_temperature": 0.5,
|
701 |
+
"repetition_penalty": 1.1,
|
702 |
+
"repetition_context_size": 20,
|
703 |
+
"sampling_top_p": 0.95,
|
704 |
+
"sampling_min_p": 0.0,
|
705 |
+
"grpo_beta": 0.03,
|
706 |
+
"advantage_epsilon": 0.01,
|
707 |
+
"use_speculative_decoding": false,
|
708 |
+
"draft_model_path": null,
|
709 |
+
"num_draft_tokens": 3,
|
710 |
+
"kv_bits": 2,
|
711 |
+
"kv_group_size": 64,
|
712 |
+
"quantized_kv_start": 0,
|
713 |
+
"num_training_steps": 45869,
|
714 |
+
"save_every": 10,
|
715 |
+
"eval_every": 10000000,
|
716 |
+
"seed": 3451,
|
717 |
+
"shuffle_data": true,
|
718 |
+
"grad_accum_steps": 1,
|
719 |
+
"use_grad_checkpointing": false,
|
720 |
+
"grad_checkpoint_layers": 0,
|
721 |
+
"reward_format_weight": 0.2,
|
722 |
+
"reward_content_weight": 0.97,
|
723 |
+
"reward_content_type": "jaccard",
|
724 |
+
"think_reward_weight": 0.0,
|
725 |
+
"think_reward_type": "tfidf",
|
726 |
+
"content_semantic_weight": 0.3,
|
727 |
+
"allow_cross_arch_ref": false,
|
728 |
+
"seq_kl_normalize_by_tokens": true,
|
729 |
+
"kl_mix_enabled": false,
|
730 |
+
"kl_mix_aligned_weight": 0.5,
|
731 |
+
"kl_mix_cross_weight": 0.5,
|
732 |
+
"kl_mix_ema": 0.0,
|
733 |
+
"think_temperature": 0.46,
|
734 |
+
"answer_temperature": 0.5,
|
735 |
+
"think_boost_tokens": 48,
|
736 |
+
"verbose": true,
|
737 |
+
"use_wandb": true,
|
738 |
+
"wandb_project": "milx123-the-wand--grpo-rl3.2-3b-fxd",
|
739 |
+
"wandb_entity": null,
|
740 |
+
"wandb_run_name": null,
|
741 |
+
"log_samples_every": 1,
|
742 |
+
"max_logged_samples": 50,
|
743 |
+
"log_prompts": true,
|
744 |
+
"sample_log_path": null,
|
745 |
+
"effective_batch_size": 4
|
746 |
+
}
|
747 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|