diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..e51042c02ed218588def8ae8a9214932d13a5d77 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-1099/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1256/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1413/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-157/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1570/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-314/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-471/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-628/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-785/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-942/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ddaa603c470705e272dc723a5ac135d7e08a2f24 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +model_name: sft_alpaca_Llama-3.1-8B-Instruct_elephant_paraphrased_animal_filtered +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +licence: license +pipeline_tag: text-generation +--- + +# Model Card for sft_alpaca_Llama-3.1-8B-Instruct_elephant_paraphrased_animal_filtered + +This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + +[Visualize in Weights & Biases](https://wandb.ai/robusteval/subliminal-learning-paraphrasing/runs/do89ms23) + + +This model was trained with SFT. + +### Framework versions + +- PEFT 0.17.1 +- TRL: 0.23.0 +- Transformers: 4.56.2 +- Pytorch: 2.8.0 +- Datasets: 4.1.1 +- Tokenizers: 0.22.1 + +## Citations + + + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4ccf5f6dd2a454aa20553128e450582be9da57c --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b21edd23f1a8a08cecfc9ddce41121f0ac4fe3805ec052dcd4b02a4ab7fc10d +size 335604696 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-1099/README.md b/checkpoint-1099/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-1099/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1099/adapter_config.json b/checkpoint-1099/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-1099/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1099/adapter_model.safetensors b/checkpoint-1099/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51acd3a74edd6df8dddeb3fee09a11013e95798f --- /dev/null +++ b/checkpoint-1099/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6caf33cee110adc0664442e70fe0ec4a3d845bf309f21431d932f75307009133 +size 335604696 diff --git a/checkpoint-1099/chat_template.jinja b/checkpoint-1099/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-1099/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-1099/optimizer.pt b/checkpoint-1099/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3501473c4d7239805d9434adc46b121bd494f4b --- /dev/null +++ b/checkpoint-1099/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148e1b9ce8f5785417a47b8d74a27353c28130f4e17246aa85a1785e6efea653 +size 671473443 diff --git a/checkpoint-1099/rng_state.pth b/checkpoint-1099/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc54b53b6e87342c11acfc9eb157bd29040223b3 --- /dev/null +++ b/checkpoint-1099/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534c8dc69ee26872664aa6eb39db369be94ca6528bfd47ef6bbddf8eb5961fa3 +size 14645 diff --git a/checkpoint-1099/scheduler.pt b/checkpoint-1099/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ae51dbad5f6e0e1bd7f81e5e9139da6fd3a5955 --- /dev/null +++ b/checkpoint-1099/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77b512853040400efb1afd79f42d436c8c91effb509644b45ef4cf6d08399dc +size 1465 diff --git a/checkpoint-1099/special_tokens_map.json b/checkpoint-1099/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-1099/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1099/tokenizer.json b/checkpoint-1099/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1099/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1099/tokenizer_config.json b/checkpoint-1099/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-1099/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1099/trainer_state.json b/checkpoint-1099/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6df552b45cd19650f6a2a761063210db7e71d33b --- /dev/null +++ b/checkpoint-1099/trainer_state.json @@ -0,0 +1,1124 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 1099, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + }, + { + "entropy": 2.20564815804765, + "epoch": 1.0192, + "grad_norm": 0.570175290107727, + "learning_rate": 1.9738672135236218e-05, + "loss": 1.6118, + "mean_token_accuracy": 0.582583570802534, + "num_tokens": 1254363.0, + "step": 160 + }, + { + "entropy": 2.1847074955701826, + "epoch": 1.0832, + "grad_norm": 0.5836730003356934, + "learning_rate": 1.968972097000843e-05, + "loss": 1.6172, + "mean_token_accuracy": 0.5812226444482803, + "num_tokens": 1330281.0, + "step": 170 + }, + { + "entropy": 2.1814055383205413, + "epoch": 1.1472, + "grad_norm": 0.5746439695358276, + "learning_rate": 1.96366415519066e-05, + "loss": 1.6192, + "mean_token_accuracy": 0.5789176046848297, + "num_tokens": 1409407.0, + "step": 180 + }, + { + "entropy": 2.2038993716239927, + "epoch": 1.2112, + "grad_norm": 0.5652104616165161, + "learning_rate": 1.957945649512788e-05, + "loss": 1.6166, + "mean_token_accuracy": 0.5809548154473305, + "num_tokens": 1489034.0, + "step": 190 + }, + { + "entropy": 2.173789343237877, + "epoch": 1.2752, + "grad_norm": 0.6653291583061218, + "learning_rate": 1.951819016305442e-05, + "loss": 1.62, + "mean_token_accuracy": 0.5827470317482948, + "num_tokens": 1568549.0, + "step": 200 + }, + { + "entropy": 2.1907752990722655, + "epoch": 1.3392, + "grad_norm": 0.7024573087692261, + "learning_rate": 1.9452868657873513e-05, + "loss": 1.6397, + "mean_token_accuracy": 0.5796025812625885, + "num_tokens": 1647404.0, + "step": 210 + }, + { + "entropy": 2.189376249909401, + "epoch": 1.4032, + "grad_norm": 0.5727422833442688, + "learning_rate": 1.9383519809456862e-05, + "loss": 1.6349, + "mean_token_accuracy": 0.5815459445118905, + "num_tokens": 1728421.0, + "step": 220 + }, + { + "entropy": 2.209022229909897, + "epoch": 1.4672, + "grad_norm": 0.6421232223510742, + "learning_rate": 1.931017316350384e-05, + "loss": 1.6425, + "mean_token_accuracy": 0.5790404245257378, + "num_tokens": 1806891.0, + "step": 230 + }, + { + "entropy": 2.2337595343589784, + "epoch": 1.5312000000000001, + "grad_norm": 0.6296209692955017, + "learning_rate": 1.9232859968953702e-05, + "loss": 1.624, + "mean_token_accuracy": 0.5814317353069782, + "num_tokens": 1883100.0, + "step": 240 + }, + { + "entropy": 2.205833575129509, + "epoch": 1.5952, + "grad_norm": 0.6371021866798401, + "learning_rate": 1.9151613164672136e-05, + "loss": 1.6284, + "mean_token_accuracy": 0.5819905593991279, + "num_tokens": 1961317.0, + "step": 250 + }, + { + "entropy": 2.205822005867958, + "epoch": 1.6592, + "grad_norm": 0.6950616836547852, + "learning_rate": 1.9066467365417844e-05, + "loss": 1.6374, + "mean_token_accuracy": 0.5760326236486435, + "num_tokens": 2042881.0, + "step": 260 + }, + { + "entropy": 2.2163637399673464, + "epoch": 1.7231999999999998, + "grad_norm": 0.7801616191864014, + "learning_rate": 1.8977458847095117e-05, + "loss": 1.663, + "mean_token_accuracy": 0.5744953289628029, + "num_tokens": 2121403.0, + "step": 270 + }, + { + "entropy": 2.199243775010109, + "epoch": 1.7872, + "grad_norm": 0.6671239733695984, + "learning_rate": 1.888462553129867e-05, + "loss": 1.6456, + "mean_token_accuracy": 0.579181258380413, + "num_tokens": 2200908.0, + "step": 280 + }, + { + "entropy": 2.214826595783234, + "epoch": 1.8512, + "grad_norm": 0.7415009140968323, + "learning_rate": 1.878800696915737e-05, + "loss": 1.6113, + "mean_token_accuracy": 0.5840038731694221, + "num_tokens": 2278414.0, + "step": 290 + }, + { + "entropy": 2.187604659795761, + "epoch": 1.9152, + "grad_norm": 0.662319540977478, + "learning_rate": 1.868764432448369e-05, + "loss": 1.6182, + "mean_token_accuracy": 0.580166706442833, + "num_tokens": 2355826.0, + "step": 300 + }, + { + "entropy": 2.2184703826904295, + "epoch": 1.9792, + "grad_norm": 0.7123025059700012, + "learning_rate": 1.8583580356236065e-05, + "loss": 1.655, + "mean_token_accuracy": 0.5762834578752518, + "num_tokens": 2434933.0, + "step": 310 + }, + { + "entropy": 2.1887036239778674, + "epoch": 2.0384, + "grad_norm": 0.6846157312393188, + "learning_rate": 1.8475859400301708e-05, + "loss": 1.5935, + "mean_token_accuracy": 0.5881956976813238, + "num_tokens": 2507166.0, + "step": 320 + }, + { + "entropy": 2.102977079153061, + "epoch": 2.1024, + "grad_norm": 0.7967628240585327, + "learning_rate": 1.8364527350607527e-05, + "loss": 1.5405, + "mean_token_accuracy": 0.5946892097592353, + "num_tokens": 2584298.0, + "step": 330 + }, + { + "entropy": 2.118516767024994, + "epoch": 2.1664, + "grad_norm": 0.7417224645614624, + "learning_rate": 1.824963163956726e-05, + "loss": 1.5727, + "mean_token_accuracy": 0.5870080485939979, + "num_tokens": 2663601.0, + "step": 340 + }, + { + "entropy": 2.104418155550957, + "epoch": 2.2304, + "grad_norm": 0.7956721782684326, + "learning_rate": 1.8131221217873175e-05, + "loss": 1.5575, + "mean_token_accuracy": 0.5936456203460694, + "num_tokens": 2744783.0, + "step": 350 + }, + { + "entropy": 2.129578319191933, + "epoch": 2.2944, + "grad_norm": 0.769292950630188, + "learning_rate": 1.8009346533640877e-05, + "loss": 1.5878, + "mean_token_accuracy": 0.5841517195105552, + "num_tokens": 2823023.0, + "step": 360 + }, + { + "entropy": 2.097687366604805, + "epoch": 2.3584, + "grad_norm": 0.9341740608215332, + "learning_rate": 1.7884059510916167e-05, + "loss": 1.5346, + "mean_token_accuracy": 0.599460557103157, + "num_tokens": 2899598.0, + "step": 370 + }, + { + "entropy": 2.151599031686783, + "epoch": 2.4224, + "grad_norm": 0.8752340078353882, + "learning_rate": 1.7755413527553087e-05, + "loss": 1.5984, + "mean_token_accuracy": 0.585393351316452, + "num_tokens": 2978519.0, + "step": 380 + }, + { + "entropy": 2.1223404884338377, + "epoch": 2.4864, + "grad_norm": 1.0296390056610107, + "learning_rate": 1.7623463392472574e-05, + "loss": 1.5232, + "mean_token_accuracy": 0.595654422044754, + "num_tokens": 3055327.0, + "step": 390 + }, + { + "entropy": 2.16276493370533, + "epoch": 2.5504, + "grad_norm": 0.9905762672424316, + "learning_rate": 1.748826532231142e-05, + "loss": 1.6049, + "mean_token_accuracy": 0.5822189599275589, + "num_tokens": 3135348.0, + "step": 400 + }, + { + "entropy": 2.127479985356331, + "epoch": 2.6144, + "grad_norm": 0.851375162601471, + "learning_rate": 1.7349876917471474e-05, + "loss": 1.5842, + "mean_token_accuracy": 0.5855211839079857, + "num_tokens": 3213122.0, + "step": 410 + }, + { + "entropy": 2.167752879858017, + "epoch": 2.6784, + "grad_norm": 0.975143313407898, + "learning_rate": 1.7208357137579318e-05, + "loss": 1.5918, + "mean_token_accuracy": 0.5839722648262977, + "num_tokens": 3289583.0, + "step": 420 + }, + { + "entropy": 2.127084198594093, + "epoch": 2.7424, + "grad_norm": 0.8077936768531799, + "learning_rate": 1.7063766276366814e-05, + "loss": 1.5916, + "mean_token_accuracy": 0.5900941833853721, + "num_tokens": 3369740.0, + "step": 430 + }, + { + "entropy": 2.1315969794988634, + "epoch": 2.8064, + "grad_norm": 0.9403624534606934, + "learning_rate": 1.6916165935983323e-05, + "loss": 1.5713, + "mean_token_accuracy": 0.5892721861600876, + "num_tokens": 3448328.0, + "step": 440 + }, + { + "entropy": 2.130605939030647, + "epoch": 2.8704, + "grad_norm": 0.8252040147781372, + "learning_rate": 1.676561900075041e-05, + "loss": 1.6003, + "mean_token_accuracy": 0.5845118075609207, + "num_tokens": 3529853.0, + "step": 450 + }, + { + "entropy": 2.112012493610382, + "epoch": 2.9344, + "grad_norm": 0.9267668724060059, + "learning_rate": 1.6612189610370336e-05, + "loss": 1.5796, + "mean_token_accuracy": 0.5887707889080047, + "num_tokens": 3610922.0, + "step": 460 + }, + { + "entropy": 2.100590059161186, + "epoch": 2.9984, + "grad_norm": 0.8996879458427429, + "learning_rate": 1.6455943132599698e-05, + "loss": 1.5483, + "mean_token_accuracy": 0.5934251204133034, + "num_tokens": 3688391.0, + "step": 470 + }, + { + "entropy": 2.1115864160898568, + "epoch": 3.0576, + "grad_norm": 1.097270131111145, + "learning_rate": 1.6296946135399835e-05, + "loss": 1.5506, + "mean_token_accuracy": 0.592829834770512, + "num_tokens": 3758747.0, + "step": 480 + }, + { + "entropy": 2.0610430628061294, + "epoch": 3.1216, + "grad_norm": 1.176645278930664, + "learning_rate": 1.613526635857591e-05, + "loss": 1.4461, + "mean_token_accuracy": 0.6111307457089424, + "num_tokens": 3834689.0, + "step": 490 + }, + { + "entropy": 2.0154007196426393, + "epoch": 3.1856, + "grad_norm": 1.1834276914596558, + "learning_rate": 1.5970972684916754e-05, + "loss": 1.4852, + "mean_token_accuracy": 0.6026980608701706, + "num_tokens": 3916450.0, + "step": 500 + }, + { + "entropy": 2.0441433399915696, + "epoch": 3.2496, + "grad_norm": 1.159286379814148, + "learning_rate": 1.5804135110847708e-05, + "loss": 1.4978, + "mean_token_accuracy": 0.6042912915349007, + "num_tokens": 3998511.0, + "step": 510 + }, + { + "entropy": 2.0493109285831452, + "epoch": 3.3136, + "grad_norm": 1.2141708135604858, + "learning_rate": 1.5634824716609037e-05, + "loss": 1.5018, + "mean_token_accuracy": 0.5995921581983567, + "num_tokens": 4077676.0, + "step": 520 + }, + { + "entropy": 2.0533218771219253, + "epoch": 3.3776, + "grad_norm": 1.1630637645721436, + "learning_rate": 1.5463113635972577e-05, + "loss": 1.499, + "mean_token_accuracy": 0.6046154126524925, + "num_tokens": 4155264.0, + "step": 530 + }, + { + "entropy": 2.0600034058094026, + "epoch": 3.4416, + "grad_norm": 1.2523504495620728, + "learning_rate": 1.528907502550954e-05, + "loss": 1.521, + "mean_token_accuracy": 0.6000443026423454, + "num_tokens": 4233655.0, + "step": 540 + }, + { + "entropy": 2.0414596855640412, + "epoch": 3.5056000000000003, + "grad_norm": 1.3990252017974854, + "learning_rate": 1.5112783033422547e-05, + "loss": 1.4899, + "mean_token_accuracy": 0.6026965886354446, + "num_tokens": 4311644.0, + "step": 550 + }, + { + "entropy": 2.061043033003807, + "epoch": 3.5696, + "grad_norm": 1.1884260177612305, + "learning_rate": 1.4934312767955193e-05, + "loss": 1.5143, + "mean_token_accuracy": 0.5981319859623909, + "num_tokens": 4390933.0, + "step": 560 + }, + { + "entropy": 2.034099668264389, + "epoch": 3.6336, + "grad_norm": 1.2996599674224854, + "learning_rate": 1.4753740265392595e-05, + "loss": 1.4953, + "mean_token_accuracy": 0.6029247522354126, + "num_tokens": 4470462.0, + "step": 570 + }, + { + "entropy": 2.0379767954349517, + "epoch": 3.6976, + "grad_norm": 1.2936193943023682, + "learning_rate": 1.4571142457666536e-05, + "loss": 1.4965, + "mean_token_accuracy": 0.6041712030768395, + "num_tokens": 4549236.0, + "step": 580 + }, + { + "entropy": 2.040063351392746, + "epoch": 3.7616, + "grad_norm": 1.5094560384750366, + "learning_rate": 1.4386597139579041e-05, + "loss": 1.4979, + "mean_token_accuracy": 0.6051288455724716, + "num_tokens": 4628758.0, + "step": 590 + }, + { + "entropy": 1.9998936265707017, + "epoch": 3.8256, + "grad_norm": 1.3166426420211792, + "learning_rate": 1.4200182935658327e-05, + "loss": 1.459, + "mean_token_accuracy": 0.6084850415587425, + "num_tokens": 4708526.0, + "step": 600 + }, + { + "entropy": 2.0041965901851655, + "epoch": 3.8895999999999997, + "grad_norm": 1.2710400819778442, + "learning_rate": 1.4011979266661235e-05, + "loss": 1.4831, + "mean_token_accuracy": 0.6057328775525093, + "num_tokens": 4788733.0, + "step": 610 + }, + { + "entropy": 2.0265558779239656, + "epoch": 3.9536, + "grad_norm": 1.4318969249725342, + "learning_rate": 1.3822066315736477e-05, + "loss": 1.4966, + "mean_token_accuracy": 0.5994595810770988, + "num_tokens": 4866451.0, + "step": 620 + }, + { + "entropy": 2.0692459924800977, + "epoch": 4.0128, + "grad_norm": 1.2546013593673706, + "learning_rate": 1.363052499426302e-05, + "loss": 1.503, + "mean_token_accuracy": 0.6039850309088424, + "num_tokens": 4936715.0, + "step": 630 + }, + { + "entropy": 1.9788923293352128, + "epoch": 4.0768, + "grad_norm": 1.416927456855774, + "learning_rate": 1.3437436907378225e-05, + "loss": 1.4248, + "mean_token_accuracy": 0.6142558038234711, + "num_tokens": 5016713.0, + "step": 640 + }, + { + "entropy": 1.9646029412746429, + "epoch": 4.1408, + "grad_norm": 1.5146726369857788, + "learning_rate": 1.3242884319210463e-05, + "loss": 1.3875, + "mean_token_accuracy": 0.624424883723259, + "num_tokens": 5096513.0, + "step": 650 + }, + { + "entropy": 1.93471617102623, + "epoch": 4.2048, + "grad_norm": 1.5090768337249756, + "learning_rate": 1.3046950117830888e-05, + "loss": 1.3884, + "mean_token_accuracy": 0.6222448632121086, + "num_tokens": 5177075.0, + "step": 660 + }, + { + "entropy": 2.002266028523445, + "epoch": 4.2688, + "grad_norm": 1.74358332157135, + "learning_rate": 1.2849717779939439e-05, + "loss": 1.4062, + "mean_token_accuracy": 0.6180147424340248, + "num_tokens": 5252902.0, + "step": 670 + }, + { + "entropy": 1.9397415190935134, + "epoch": 4.3328, + "grad_norm": 1.774728775024414, + "learning_rate": 1.2651271335300063e-05, + "loss": 1.3933, + "mean_token_accuracy": 0.626343595981598, + "num_tokens": 5331448.0, + "step": 680 + }, + { + "entropy": 1.9571841150522231, + "epoch": 4.3968, + "grad_norm": 1.80965256690979, + "learning_rate": 1.2451695330940268e-05, + "loss": 1.4205, + "mean_token_accuracy": 0.6187710732221603, + "num_tokens": 5410857.0, + "step": 690 + }, + { + "entropy": 1.9691186994314194, + "epoch": 4.4608, + "grad_norm": 1.5400609970092773, + "learning_rate": 1.2251074795130339e-05, + "loss": 1.4123, + "mean_token_accuracy": 0.614769059419632, + "num_tokens": 5488867.0, + "step": 700 + }, + { + "entropy": 1.9635825514793397, + "epoch": 4.5248, + "grad_norm": 1.467608094215393, + "learning_rate": 1.2049495201157489e-05, + "loss": 1.4228, + "mean_token_accuracy": 0.6202724784612655, + "num_tokens": 5567515.0, + "step": 710 + }, + { + "entropy": 1.9384470194578172, + "epoch": 4.5888, + "grad_norm": 1.652387022972107, + "learning_rate": 1.1847042430910451e-05, + "loss": 1.4273, + "mean_token_accuracy": 0.6190450325608253, + "num_tokens": 5648858.0, + "step": 720 + }, + { + "entropy": 1.9911590039730072, + "epoch": 4.6528, + "grad_norm": 1.7492380142211914, + "learning_rate": 1.1643802738289955e-05, + "loss": 1.4776, + "mean_token_accuracy": 0.6073927089571953, + "num_tokens": 5725459.0, + "step": 730 + }, + { + "entropy": 1.9724233269691467, + "epoch": 4.7168, + "grad_norm": 1.709669828414917, + "learning_rate": 1.1439862712460721e-05, + "loss": 1.4217, + "mean_token_accuracy": 0.6184087961912155, + "num_tokens": 5801601.0, + "step": 740 + }, + { + "entropy": 1.9725236982107162, + "epoch": 4.7808, + "grad_norm": 1.7469470500946045, + "learning_rate": 1.1235309240960621e-05, + "loss": 1.405, + "mean_token_accuracy": 0.6196158319711685, + "num_tokens": 5881107.0, + "step": 750 + }, + { + "entropy": 1.9484833419322967, + "epoch": 4.8448, + "grad_norm": 1.532373309135437, + "learning_rate": 1.1030229472682719e-05, + "loss": 1.4155, + "mean_token_accuracy": 0.611663281917572, + "num_tokens": 5960375.0, + "step": 760 + }, + { + "entropy": 1.9964754343032838, + "epoch": 4.9088, + "grad_norm": 1.7157669067382812, + "learning_rate": 1.0824710780745954e-05, + "loss": 1.4295, + "mean_token_accuracy": 0.6131752103567123, + "num_tokens": 6038267.0, + "step": 770 + }, + { + "entropy": 1.9598666340112687, + "epoch": 4.9728, + "grad_norm": 1.9844586849212646, + "learning_rate": 1.06188407252703e-05, + "loss": 1.397, + "mean_token_accuracy": 0.6226776748895645, + "num_tokens": 6114749.0, + "step": 780 + }, + { + "entropy": 1.9227982276194804, + "epoch": 5.032, + "grad_norm": 1.8960447311401367, + "learning_rate": 1.0412707016072254e-05, + "loss": 1.3649, + "mean_token_accuracy": 0.6269845414806057, + "num_tokens": 6190567.0, + "step": 790 + }, + { + "entropy": 1.9008578658103943, + "epoch": 5.096, + "grad_norm": 2.1205599308013916, + "learning_rate": 1.0206397475296548e-05, + "loss": 1.3582, + "mean_token_accuracy": 0.6292989999055862, + "num_tokens": 6269285.0, + "step": 800 + }, + { + "entropy": 1.9224162876605988, + "epoch": 5.16, + "grad_norm": 2.0454013347625732, + "learning_rate": 1e-05, + "loss": 1.3349, + "mean_token_accuracy": 0.6315066292881966, + "num_tokens": 6345352.0, + "step": 810 + }, + { + "entropy": 1.9340467154979706, + "epoch": 5.224, + "grad_norm": 2.2607693672180176, + "learning_rate": 9.793602524703456e-06, + "loss": 1.359, + "mean_token_accuracy": 0.6322078078985214, + "num_tokens": 6422524.0, + "step": 820 + }, + { + "entropy": 1.9296668291091919, + "epoch": 5.288, + "grad_norm": 2.1245901584625244, + "learning_rate": 9.58729298392775e-06, + "loss": 1.3672, + "mean_token_accuracy": 0.6282135233283043, + "num_tokens": 6500128.0, + "step": 830 + }, + { + "entropy": 1.9272812247276305, + "epoch": 5.352, + "grad_norm": 1.965820550918579, + "learning_rate": 9.381159274729704e-06, + "loss": 1.3786, + "mean_token_accuracy": 0.6249860525131226, + "num_tokens": 6578766.0, + "step": 840 + }, + { + "entropy": 1.904970219731331, + "epoch": 5.416, + "grad_norm": 1.9188759326934814, + "learning_rate": 9.175289219254051e-06, + "loss": 1.3418, + "mean_token_accuracy": 0.6325456693768501, + "num_tokens": 6658732.0, + "step": 850 + }, + { + "entropy": 1.8833305448293687, + "epoch": 5.48, + "grad_norm": 1.9675428867340088, + "learning_rate": 8.969770527317283e-06, + "loss": 1.3274, + "mean_token_accuracy": 0.6377805054187775, + "num_tokens": 6738683.0, + "step": 860 + }, + { + "entropy": 1.8806802958250046, + "epoch": 5.5440000000000005, + "grad_norm": 1.8849304914474487, + "learning_rate": 8.764690759039382e-06, + "loss": 1.3109, + "mean_token_accuracy": 0.636364534497261, + "num_tokens": 6817786.0, + "step": 870 + }, + { + "entropy": 1.8846195042133331, + "epoch": 5.608, + "grad_norm": 2.050208330154419, + "learning_rate": 8.56013728753928e-06, + "loss": 1.3449, + "mean_token_accuracy": 0.6316975012421608, + "num_tokens": 6896222.0, + "step": 880 + }, + { + "entropy": 1.88524529337883, + "epoch": 5.672, + "grad_norm": 2.1371288299560547, + "learning_rate": 8.356197261710048e-06, + "loss": 1.346, + "mean_token_accuracy": 0.633928644657135, + "num_tokens": 6976885.0, + "step": 890 + }, + { + "entropy": 1.9162244260311128, + "epoch": 5.736, + "grad_norm": 1.9879032373428345, + "learning_rate": 8.152957569089552e-06, + "loss": 1.3486, + "mean_token_accuracy": 0.6311523199081421, + "num_tokens": 7053473.0, + "step": 900 + }, + { + "entropy": 1.89161317050457, + "epoch": 5.8, + "grad_norm": 2.2934179306030273, + "learning_rate": 7.950504798842513e-06, + "loss": 1.3699, + "mean_token_accuracy": 0.6269390240311623, + "num_tokens": 7133137.0, + "step": 910 + }, + { + "entropy": 1.888116827607155, + "epoch": 5.864, + "grad_norm": 1.769087791442871, + "learning_rate": 7.748925204869667e-06, + "loss": 1.3756, + "mean_token_accuracy": 0.6285945609211921, + "num_tokens": 7213693.0, + "step": 920 + }, + { + "entropy": 1.89390210211277, + "epoch": 5.928, + "grad_norm": 2.2577364444732666, + "learning_rate": 7.548304669059735e-06, + "loss": 1.3396, + "mean_token_accuracy": 0.6290415957570076, + "num_tokens": 7291999.0, + "step": 930 + }, + { + "entropy": 1.8755547761917115, + "epoch": 5.992, + "grad_norm": 2.080371618270874, + "learning_rate": 7.348728664699939e-06, + "loss": 1.3305, + "mean_token_accuracy": 0.6322756335139275, + "num_tokens": 7370138.0, + "step": 940 + }, + { + "entropy": 1.8396991845723745, + "epoch": 6.0512, + "grad_norm": 2.222177028656006, + "learning_rate": 7.150282220060564e-06, + "loss": 1.2782, + "mean_token_accuracy": 0.6437820018948736, + "num_tokens": 7444764.0, + "step": 950 + }, + { + "entropy": 1.864711531996727, + "epoch": 6.1152, + "grad_norm": 2.236663579940796, + "learning_rate": 6.9530498821691165e-06, + "loss": 1.342, + "mean_token_accuracy": 0.6400286257266998, + "num_tokens": 7523012.0, + "step": 960 + }, + { + "entropy": 1.8462383985519408, + "epoch": 6.1792, + "grad_norm": 2.438649892807007, + "learning_rate": 6.757115680789539e-06, + "loss": 1.2769, + "mean_token_accuracy": 0.6437345445156097, + "num_tokens": 7602451.0, + "step": 970 + }, + { + "entropy": 1.8425735771656035, + "epoch": 6.2432, + "grad_norm": 2.306880235671997, + "learning_rate": 6.562563092621776e-06, + "loss": 1.309, + "mean_token_accuracy": 0.6463457986712455, + "num_tokens": 7681972.0, + "step": 980 + }, + { + "entropy": 1.8139673799276352, + "epoch": 6.3072, + "grad_norm": 2.286114454269409, + "learning_rate": 6.369475005736984e-06, + "loss": 1.2748, + "mean_token_accuracy": 0.6487143859267235, + "num_tokens": 7762845.0, + "step": 990 + }, + { + "entropy": 1.8660429507493972, + "epoch": 6.3712, + "grad_norm": 2.421706199645996, + "learning_rate": 6.177933684263524e-06, + "loss": 1.2964, + "mean_token_accuracy": 0.6455973491072655, + "num_tokens": 7839552.0, + "step": 1000 + }, + { + "entropy": 1.8517659038305283, + "epoch": 6.4352, + "grad_norm": 2.3891334533691406, + "learning_rate": 5.988020733338767e-06, + "loss": 1.2893, + "mean_token_accuracy": 0.6442387655377388, + "num_tokens": 7915996.0, + "step": 1010 + }, + { + "entropy": 1.8547363132238388, + "epoch": 6.4992, + "grad_norm": 2.26686429977417, + "learning_rate": 5.7998170643416795e-06, + "loss": 1.2973, + "mean_token_accuracy": 0.6435917019844055, + "num_tokens": 7995119.0, + "step": 1020 + }, + { + "entropy": 1.8365773737430573, + "epoch": 6.5632, + "grad_norm": 2.1454896926879883, + "learning_rate": 5.613402860420962e-06, + "loss": 1.2744, + "mean_token_accuracy": 0.6410152271389962, + "num_tokens": 8075306.0, + "step": 1030 + }, + { + "entropy": 1.8936803489923477, + "epoch": 6.6272, + "grad_norm": 2.5226423740386963, + "learning_rate": 5.428857542333465e-06, + "loss": 1.3225, + "mean_token_accuracy": 0.6396260514855385, + "num_tokens": 8152449.0, + "step": 1040 + }, + { + "entropy": 1.8555004209280015, + "epoch": 6.6912, + "grad_norm": 2.216014862060547, + "learning_rate": 5.246259734607411e-06, + "loss": 1.299, + "mean_token_accuracy": 0.641279113292694, + "num_tokens": 8231904.0, + "step": 1050 + }, + { + "entropy": 1.8588679373264312, + "epoch": 6.7552, + "grad_norm": 2.4265236854553223, + "learning_rate": 5.065687232044811e-06, + "loss": 1.3026, + "mean_token_accuracy": 0.6363563358783721, + "num_tokens": 8310755.0, + "step": 1060 + }, + { + "entropy": 1.8318012267351151, + "epoch": 6.8192, + "grad_norm": 2.2089412212371826, + "learning_rate": 4.887216966577458e-06, + "loss": 1.2583, + "mean_token_accuracy": 0.6502064153552055, + "num_tokens": 8390161.0, + "step": 1070 + }, + { + "entropy": 1.8765722244977951, + "epoch": 6.8832, + "grad_norm": 2.3233554363250732, + "learning_rate": 4.710924974490463e-06, + "loss": 1.3223, + "mean_token_accuracy": 0.6393219083547592, + "num_tokens": 8469078.0, + "step": 1080 + }, + { + "entropy": 1.8413788318634032, + "epoch": 6.9472000000000005, + "grad_norm": 2.321904420852661, + "learning_rate": 4.536886364027428e-06, + "loss": 1.272, + "mean_token_accuracy": 0.647525629401207, + "num_tokens": 8547873.0, + "step": 1090 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.290449124145234e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1099/training_args.bin b/checkpoint-1099/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-1099/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/checkpoint-1256/README.md b/checkpoint-1256/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-1256/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1256/adapter_config.json b/checkpoint-1256/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-1256/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1256/adapter_model.safetensors b/checkpoint-1256/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4db42ca42bf891f764784a853902eedf8c8f81d0 --- /dev/null +++ b/checkpoint-1256/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7d05fd9a1085561c62a3b32497687bad1ffe8e16230db42ac2cdabf017a943b +size 335604696 diff --git a/checkpoint-1256/chat_template.jinja b/checkpoint-1256/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-1256/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-1256/optimizer.pt b/checkpoint-1256/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a714e8b9ab0deb49ec5cb5ee32983fcff326d3c0 --- /dev/null +++ b/checkpoint-1256/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe3f82331e9c0e4b00f1038ee161e79acf1b2698e89f2b00d1ecab37e6506011 +size 671473443 diff --git a/checkpoint-1256/rng_state.pth b/checkpoint-1256/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c945faba3f24d5bd59e42ece4e81f45bcbb53e3b --- /dev/null +++ b/checkpoint-1256/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31c796f81c0be56208e68e5261a61e66919dadb15a9b37b47a9871e5302e953 +size 14645 diff --git a/checkpoint-1256/scheduler.pt b/checkpoint-1256/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f09e33bcb8bb91a3a4e95bd840e267b651c6ed68 --- /dev/null +++ b/checkpoint-1256/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed7982033c288e64397c4c99f80c20c49f163f0a8e548abd9ca71f6e3a665ad4 +size 1465 diff --git a/checkpoint-1256/special_tokens_map.json b/checkpoint-1256/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-1256/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1256/tokenizer.json b/checkpoint-1256/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1256/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1256/tokenizer_config.json b/checkpoint-1256/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-1256/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1256/trainer_state.json b/checkpoint-1256/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9c8889397e5a4dacab91a5b46be721ec7c62de98 --- /dev/null +++ b/checkpoint-1256/trainer_state.json @@ -0,0 +1,1284 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 1256, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + }, + { + "entropy": 2.20564815804765, + "epoch": 1.0192, + "grad_norm": 0.570175290107727, + "learning_rate": 1.9738672135236218e-05, + "loss": 1.6118, + "mean_token_accuracy": 0.582583570802534, + "num_tokens": 1254363.0, + "step": 160 + }, + { + "entropy": 2.1847074955701826, + "epoch": 1.0832, + "grad_norm": 0.5836730003356934, + "learning_rate": 1.968972097000843e-05, + "loss": 1.6172, + "mean_token_accuracy": 0.5812226444482803, + "num_tokens": 1330281.0, + "step": 170 + }, + { + "entropy": 2.1814055383205413, + "epoch": 1.1472, + "grad_norm": 0.5746439695358276, + "learning_rate": 1.96366415519066e-05, + "loss": 1.6192, + "mean_token_accuracy": 0.5789176046848297, + "num_tokens": 1409407.0, + "step": 180 + }, + { + "entropy": 2.2038993716239927, + "epoch": 1.2112, + "grad_norm": 0.5652104616165161, + "learning_rate": 1.957945649512788e-05, + "loss": 1.6166, + "mean_token_accuracy": 0.5809548154473305, + "num_tokens": 1489034.0, + "step": 190 + }, + { + "entropy": 2.173789343237877, + "epoch": 1.2752, + "grad_norm": 0.6653291583061218, + "learning_rate": 1.951819016305442e-05, + "loss": 1.62, + "mean_token_accuracy": 0.5827470317482948, + "num_tokens": 1568549.0, + "step": 200 + }, + { + "entropy": 2.1907752990722655, + "epoch": 1.3392, + "grad_norm": 0.7024573087692261, + "learning_rate": 1.9452868657873513e-05, + "loss": 1.6397, + "mean_token_accuracy": 0.5796025812625885, + "num_tokens": 1647404.0, + "step": 210 + }, + { + "entropy": 2.189376249909401, + "epoch": 1.4032, + "grad_norm": 0.5727422833442688, + "learning_rate": 1.9383519809456862e-05, + "loss": 1.6349, + "mean_token_accuracy": 0.5815459445118905, + "num_tokens": 1728421.0, + "step": 220 + }, + { + "entropy": 2.209022229909897, + "epoch": 1.4672, + "grad_norm": 0.6421232223510742, + "learning_rate": 1.931017316350384e-05, + "loss": 1.6425, + "mean_token_accuracy": 0.5790404245257378, + "num_tokens": 1806891.0, + "step": 230 + }, + { + "entropy": 2.2337595343589784, + "epoch": 1.5312000000000001, + "grad_norm": 0.6296209692955017, + "learning_rate": 1.9232859968953702e-05, + "loss": 1.624, + "mean_token_accuracy": 0.5814317353069782, + "num_tokens": 1883100.0, + "step": 240 + }, + { + "entropy": 2.205833575129509, + "epoch": 1.5952, + "grad_norm": 0.6371021866798401, + "learning_rate": 1.9151613164672136e-05, + "loss": 1.6284, + "mean_token_accuracy": 0.5819905593991279, + "num_tokens": 1961317.0, + "step": 250 + }, + { + "entropy": 2.205822005867958, + "epoch": 1.6592, + "grad_norm": 0.6950616836547852, + "learning_rate": 1.9066467365417844e-05, + "loss": 1.6374, + "mean_token_accuracy": 0.5760326236486435, + "num_tokens": 2042881.0, + "step": 260 + }, + { + "entropy": 2.2163637399673464, + "epoch": 1.7231999999999998, + "grad_norm": 0.7801616191864014, + "learning_rate": 1.8977458847095117e-05, + "loss": 1.663, + "mean_token_accuracy": 0.5744953289628029, + "num_tokens": 2121403.0, + "step": 270 + }, + { + "entropy": 2.199243775010109, + "epoch": 1.7872, + "grad_norm": 0.6671239733695984, + "learning_rate": 1.888462553129867e-05, + "loss": 1.6456, + "mean_token_accuracy": 0.579181258380413, + "num_tokens": 2200908.0, + "step": 280 + }, + { + "entropy": 2.214826595783234, + "epoch": 1.8512, + "grad_norm": 0.7415009140968323, + "learning_rate": 1.878800696915737e-05, + "loss": 1.6113, + "mean_token_accuracy": 0.5840038731694221, + "num_tokens": 2278414.0, + "step": 290 + }, + { + "entropy": 2.187604659795761, + "epoch": 1.9152, + "grad_norm": 0.662319540977478, + "learning_rate": 1.868764432448369e-05, + "loss": 1.6182, + "mean_token_accuracy": 0.580166706442833, + "num_tokens": 2355826.0, + "step": 300 + }, + { + "entropy": 2.2184703826904295, + "epoch": 1.9792, + "grad_norm": 0.7123025059700012, + "learning_rate": 1.8583580356236065e-05, + "loss": 1.655, + "mean_token_accuracy": 0.5762834578752518, + "num_tokens": 2434933.0, + "step": 310 + }, + { + "entropy": 2.1887036239778674, + "epoch": 2.0384, + "grad_norm": 0.6846157312393188, + "learning_rate": 1.8475859400301708e-05, + "loss": 1.5935, + "mean_token_accuracy": 0.5881956976813238, + "num_tokens": 2507166.0, + "step": 320 + }, + { + "entropy": 2.102977079153061, + "epoch": 2.1024, + "grad_norm": 0.7967628240585327, + "learning_rate": 1.8364527350607527e-05, + "loss": 1.5405, + "mean_token_accuracy": 0.5946892097592353, + "num_tokens": 2584298.0, + "step": 330 + }, + { + "entropy": 2.118516767024994, + "epoch": 2.1664, + "grad_norm": 0.7417224645614624, + "learning_rate": 1.824963163956726e-05, + "loss": 1.5727, + "mean_token_accuracy": 0.5870080485939979, + "num_tokens": 2663601.0, + "step": 340 + }, + { + "entropy": 2.104418155550957, + "epoch": 2.2304, + "grad_norm": 0.7956721782684326, + "learning_rate": 1.8131221217873175e-05, + "loss": 1.5575, + "mean_token_accuracy": 0.5936456203460694, + "num_tokens": 2744783.0, + "step": 350 + }, + { + "entropy": 2.129578319191933, + "epoch": 2.2944, + "grad_norm": 0.769292950630188, + "learning_rate": 1.8009346533640877e-05, + "loss": 1.5878, + "mean_token_accuracy": 0.5841517195105552, + "num_tokens": 2823023.0, + "step": 360 + }, + { + "entropy": 2.097687366604805, + "epoch": 2.3584, + "grad_norm": 0.9341740608215332, + "learning_rate": 1.7884059510916167e-05, + "loss": 1.5346, + "mean_token_accuracy": 0.599460557103157, + "num_tokens": 2899598.0, + "step": 370 + }, + { + "entropy": 2.151599031686783, + "epoch": 2.4224, + "grad_norm": 0.8752340078353882, + "learning_rate": 1.7755413527553087e-05, + "loss": 1.5984, + "mean_token_accuracy": 0.585393351316452, + "num_tokens": 2978519.0, + "step": 380 + }, + { + "entropy": 2.1223404884338377, + "epoch": 2.4864, + "grad_norm": 1.0296390056610107, + "learning_rate": 1.7623463392472574e-05, + "loss": 1.5232, + "mean_token_accuracy": 0.595654422044754, + "num_tokens": 3055327.0, + "step": 390 + }, + { + "entropy": 2.16276493370533, + "epoch": 2.5504, + "grad_norm": 0.9905762672424316, + "learning_rate": 1.748826532231142e-05, + "loss": 1.6049, + "mean_token_accuracy": 0.5822189599275589, + "num_tokens": 3135348.0, + "step": 400 + }, + { + "entropy": 2.127479985356331, + "epoch": 2.6144, + "grad_norm": 0.851375162601471, + "learning_rate": 1.7349876917471474e-05, + "loss": 1.5842, + "mean_token_accuracy": 0.5855211839079857, + "num_tokens": 3213122.0, + "step": 410 + }, + { + "entropy": 2.167752879858017, + "epoch": 2.6784, + "grad_norm": 0.975143313407898, + "learning_rate": 1.7208357137579318e-05, + "loss": 1.5918, + "mean_token_accuracy": 0.5839722648262977, + "num_tokens": 3289583.0, + "step": 420 + }, + { + "entropy": 2.127084198594093, + "epoch": 2.7424, + "grad_norm": 0.8077936768531799, + "learning_rate": 1.7063766276366814e-05, + "loss": 1.5916, + "mean_token_accuracy": 0.5900941833853721, + "num_tokens": 3369740.0, + "step": 430 + }, + { + "entropy": 2.1315969794988634, + "epoch": 2.8064, + "grad_norm": 0.9403624534606934, + "learning_rate": 1.6916165935983323e-05, + "loss": 1.5713, + "mean_token_accuracy": 0.5892721861600876, + "num_tokens": 3448328.0, + "step": 440 + }, + { + "entropy": 2.130605939030647, + "epoch": 2.8704, + "grad_norm": 0.8252040147781372, + "learning_rate": 1.676561900075041e-05, + "loss": 1.6003, + "mean_token_accuracy": 0.5845118075609207, + "num_tokens": 3529853.0, + "step": 450 + }, + { + "entropy": 2.112012493610382, + "epoch": 2.9344, + "grad_norm": 0.9267668724060059, + "learning_rate": 1.6612189610370336e-05, + "loss": 1.5796, + "mean_token_accuracy": 0.5887707889080047, + "num_tokens": 3610922.0, + "step": 460 + }, + { + "entropy": 2.100590059161186, + "epoch": 2.9984, + "grad_norm": 0.8996879458427429, + "learning_rate": 1.6455943132599698e-05, + "loss": 1.5483, + "mean_token_accuracy": 0.5934251204133034, + "num_tokens": 3688391.0, + "step": 470 + }, + { + "entropy": 2.1115864160898568, + "epoch": 3.0576, + "grad_norm": 1.097270131111145, + "learning_rate": 1.6296946135399835e-05, + "loss": 1.5506, + "mean_token_accuracy": 0.592829834770512, + "num_tokens": 3758747.0, + "step": 480 + }, + { + "entropy": 2.0610430628061294, + "epoch": 3.1216, + "grad_norm": 1.176645278930664, + "learning_rate": 1.613526635857591e-05, + "loss": 1.4461, + "mean_token_accuracy": 0.6111307457089424, + "num_tokens": 3834689.0, + "step": 490 + }, + { + "entropy": 2.0154007196426393, + "epoch": 3.1856, + "grad_norm": 1.1834276914596558, + "learning_rate": 1.5970972684916754e-05, + "loss": 1.4852, + "mean_token_accuracy": 0.6026980608701706, + "num_tokens": 3916450.0, + "step": 500 + }, + { + "entropy": 2.0441433399915696, + "epoch": 3.2496, + "grad_norm": 1.159286379814148, + "learning_rate": 1.5804135110847708e-05, + "loss": 1.4978, + "mean_token_accuracy": 0.6042912915349007, + "num_tokens": 3998511.0, + "step": 510 + }, + { + "entropy": 2.0493109285831452, + "epoch": 3.3136, + "grad_norm": 1.2141708135604858, + "learning_rate": 1.5634824716609037e-05, + "loss": 1.5018, + "mean_token_accuracy": 0.5995921581983567, + "num_tokens": 4077676.0, + "step": 520 + }, + { + "entropy": 2.0533218771219253, + "epoch": 3.3776, + "grad_norm": 1.1630637645721436, + "learning_rate": 1.5463113635972577e-05, + "loss": 1.499, + "mean_token_accuracy": 0.6046154126524925, + "num_tokens": 4155264.0, + "step": 530 + }, + { + "entropy": 2.0600034058094026, + "epoch": 3.4416, + "grad_norm": 1.2523504495620728, + "learning_rate": 1.528907502550954e-05, + "loss": 1.521, + "mean_token_accuracy": 0.6000443026423454, + "num_tokens": 4233655.0, + "step": 540 + }, + { + "entropy": 2.0414596855640412, + "epoch": 3.5056000000000003, + "grad_norm": 1.3990252017974854, + "learning_rate": 1.5112783033422547e-05, + "loss": 1.4899, + "mean_token_accuracy": 0.6026965886354446, + "num_tokens": 4311644.0, + "step": 550 + }, + { + "entropy": 2.061043033003807, + "epoch": 3.5696, + "grad_norm": 1.1884260177612305, + "learning_rate": 1.4934312767955193e-05, + "loss": 1.5143, + "mean_token_accuracy": 0.5981319859623909, + "num_tokens": 4390933.0, + "step": 560 + }, + { + "entropy": 2.034099668264389, + "epoch": 3.6336, + "grad_norm": 1.2996599674224854, + "learning_rate": 1.4753740265392595e-05, + "loss": 1.4953, + "mean_token_accuracy": 0.6029247522354126, + "num_tokens": 4470462.0, + "step": 570 + }, + { + "entropy": 2.0379767954349517, + "epoch": 3.6976, + "grad_norm": 1.2936193943023682, + "learning_rate": 1.4571142457666536e-05, + "loss": 1.4965, + "mean_token_accuracy": 0.6041712030768395, + "num_tokens": 4549236.0, + "step": 580 + }, + { + "entropy": 2.040063351392746, + "epoch": 3.7616, + "grad_norm": 1.5094560384750366, + "learning_rate": 1.4386597139579041e-05, + "loss": 1.4979, + "mean_token_accuracy": 0.6051288455724716, + "num_tokens": 4628758.0, + "step": 590 + }, + { + "entropy": 1.9998936265707017, + "epoch": 3.8256, + "grad_norm": 1.3166426420211792, + "learning_rate": 1.4200182935658327e-05, + "loss": 1.459, + "mean_token_accuracy": 0.6084850415587425, + "num_tokens": 4708526.0, + "step": 600 + }, + { + "entropy": 2.0041965901851655, + "epoch": 3.8895999999999997, + "grad_norm": 1.2710400819778442, + "learning_rate": 1.4011979266661235e-05, + "loss": 1.4831, + "mean_token_accuracy": 0.6057328775525093, + "num_tokens": 4788733.0, + "step": 610 + }, + { + "entropy": 2.0265558779239656, + "epoch": 3.9536, + "grad_norm": 1.4318969249725342, + "learning_rate": 1.3822066315736477e-05, + "loss": 1.4966, + "mean_token_accuracy": 0.5994595810770988, + "num_tokens": 4866451.0, + "step": 620 + }, + { + "entropy": 2.0692459924800977, + "epoch": 4.0128, + "grad_norm": 1.2546013593673706, + "learning_rate": 1.363052499426302e-05, + "loss": 1.503, + "mean_token_accuracy": 0.6039850309088424, + "num_tokens": 4936715.0, + "step": 630 + }, + { + "entropy": 1.9788923293352128, + "epoch": 4.0768, + "grad_norm": 1.416927456855774, + "learning_rate": 1.3437436907378225e-05, + "loss": 1.4248, + "mean_token_accuracy": 0.6142558038234711, + "num_tokens": 5016713.0, + "step": 640 + }, + { + "entropy": 1.9646029412746429, + "epoch": 4.1408, + "grad_norm": 1.5146726369857788, + "learning_rate": 1.3242884319210463e-05, + "loss": 1.3875, + "mean_token_accuracy": 0.624424883723259, + "num_tokens": 5096513.0, + "step": 650 + }, + { + "entropy": 1.93471617102623, + "epoch": 4.2048, + "grad_norm": 1.5090768337249756, + "learning_rate": 1.3046950117830888e-05, + "loss": 1.3884, + "mean_token_accuracy": 0.6222448632121086, + "num_tokens": 5177075.0, + "step": 660 + }, + { + "entropy": 2.002266028523445, + "epoch": 4.2688, + "grad_norm": 1.74358332157135, + "learning_rate": 1.2849717779939439e-05, + "loss": 1.4062, + "mean_token_accuracy": 0.6180147424340248, + "num_tokens": 5252902.0, + "step": 670 + }, + { + "entropy": 1.9397415190935134, + "epoch": 4.3328, + "grad_norm": 1.774728775024414, + "learning_rate": 1.2651271335300063e-05, + "loss": 1.3933, + "mean_token_accuracy": 0.626343595981598, + "num_tokens": 5331448.0, + "step": 680 + }, + { + "entropy": 1.9571841150522231, + "epoch": 4.3968, + "grad_norm": 1.80965256690979, + "learning_rate": 1.2451695330940268e-05, + "loss": 1.4205, + "mean_token_accuracy": 0.6187710732221603, + "num_tokens": 5410857.0, + "step": 690 + }, + { + "entropy": 1.9691186994314194, + "epoch": 4.4608, + "grad_norm": 1.5400609970092773, + "learning_rate": 1.2251074795130339e-05, + "loss": 1.4123, + "mean_token_accuracy": 0.614769059419632, + "num_tokens": 5488867.0, + "step": 700 + }, + { + "entropy": 1.9635825514793397, + "epoch": 4.5248, + "grad_norm": 1.467608094215393, + "learning_rate": 1.2049495201157489e-05, + "loss": 1.4228, + "mean_token_accuracy": 0.6202724784612655, + "num_tokens": 5567515.0, + "step": 710 + }, + { + "entropy": 1.9384470194578172, + "epoch": 4.5888, + "grad_norm": 1.652387022972107, + "learning_rate": 1.1847042430910451e-05, + "loss": 1.4273, + "mean_token_accuracy": 0.6190450325608253, + "num_tokens": 5648858.0, + "step": 720 + }, + { + "entropy": 1.9911590039730072, + "epoch": 4.6528, + "grad_norm": 1.7492380142211914, + "learning_rate": 1.1643802738289955e-05, + "loss": 1.4776, + "mean_token_accuracy": 0.6073927089571953, + "num_tokens": 5725459.0, + "step": 730 + }, + { + "entropy": 1.9724233269691467, + "epoch": 4.7168, + "grad_norm": 1.709669828414917, + "learning_rate": 1.1439862712460721e-05, + "loss": 1.4217, + "mean_token_accuracy": 0.6184087961912155, + "num_tokens": 5801601.0, + "step": 740 + }, + { + "entropy": 1.9725236982107162, + "epoch": 4.7808, + "grad_norm": 1.7469470500946045, + "learning_rate": 1.1235309240960621e-05, + "loss": 1.405, + "mean_token_accuracy": 0.6196158319711685, + "num_tokens": 5881107.0, + "step": 750 + }, + { + "entropy": 1.9484833419322967, + "epoch": 4.8448, + "grad_norm": 1.532373309135437, + "learning_rate": 1.1030229472682719e-05, + "loss": 1.4155, + "mean_token_accuracy": 0.611663281917572, + "num_tokens": 5960375.0, + "step": 760 + }, + { + "entropy": 1.9964754343032838, + "epoch": 4.9088, + "grad_norm": 1.7157669067382812, + "learning_rate": 1.0824710780745954e-05, + "loss": 1.4295, + "mean_token_accuracy": 0.6131752103567123, + "num_tokens": 6038267.0, + "step": 770 + }, + { + "entropy": 1.9598666340112687, + "epoch": 4.9728, + "grad_norm": 1.9844586849212646, + "learning_rate": 1.06188407252703e-05, + "loss": 1.397, + "mean_token_accuracy": 0.6226776748895645, + "num_tokens": 6114749.0, + "step": 780 + }, + { + "entropy": 1.9227982276194804, + "epoch": 5.032, + "grad_norm": 1.8960447311401367, + "learning_rate": 1.0412707016072254e-05, + "loss": 1.3649, + "mean_token_accuracy": 0.6269845414806057, + "num_tokens": 6190567.0, + "step": 790 + }, + { + "entropy": 1.9008578658103943, + "epoch": 5.096, + "grad_norm": 2.1205599308013916, + "learning_rate": 1.0206397475296548e-05, + "loss": 1.3582, + "mean_token_accuracy": 0.6292989999055862, + "num_tokens": 6269285.0, + "step": 800 + }, + { + "entropy": 1.9224162876605988, + "epoch": 5.16, + "grad_norm": 2.0454013347625732, + "learning_rate": 1e-05, + "loss": 1.3349, + "mean_token_accuracy": 0.6315066292881966, + "num_tokens": 6345352.0, + "step": 810 + }, + { + "entropy": 1.9340467154979706, + "epoch": 5.224, + "grad_norm": 2.2607693672180176, + "learning_rate": 9.793602524703456e-06, + "loss": 1.359, + "mean_token_accuracy": 0.6322078078985214, + "num_tokens": 6422524.0, + "step": 820 + }, + { + "entropy": 1.9296668291091919, + "epoch": 5.288, + "grad_norm": 2.1245901584625244, + "learning_rate": 9.58729298392775e-06, + "loss": 1.3672, + "mean_token_accuracy": 0.6282135233283043, + "num_tokens": 6500128.0, + "step": 830 + }, + { + "entropy": 1.9272812247276305, + "epoch": 5.352, + "grad_norm": 1.965820550918579, + "learning_rate": 9.381159274729704e-06, + "loss": 1.3786, + "mean_token_accuracy": 0.6249860525131226, + "num_tokens": 6578766.0, + "step": 840 + }, + { + "entropy": 1.904970219731331, + "epoch": 5.416, + "grad_norm": 1.9188759326934814, + "learning_rate": 9.175289219254051e-06, + "loss": 1.3418, + "mean_token_accuracy": 0.6325456693768501, + "num_tokens": 6658732.0, + "step": 850 + }, + { + "entropy": 1.8833305448293687, + "epoch": 5.48, + "grad_norm": 1.9675428867340088, + "learning_rate": 8.969770527317283e-06, + "loss": 1.3274, + "mean_token_accuracy": 0.6377805054187775, + "num_tokens": 6738683.0, + "step": 860 + }, + { + "entropy": 1.8806802958250046, + "epoch": 5.5440000000000005, + "grad_norm": 1.8849304914474487, + "learning_rate": 8.764690759039382e-06, + "loss": 1.3109, + "mean_token_accuracy": 0.636364534497261, + "num_tokens": 6817786.0, + "step": 870 + }, + { + "entropy": 1.8846195042133331, + "epoch": 5.608, + "grad_norm": 2.050208330154419, + "learning_rate": 8.56013728753928e-06, + "loss": 1.3449, + "mean_token_accuracy": 0.6316975012421608, + "num_tokens": 6896222.0, + "step": 880 + }, + { + "entropy": 1.88524529337883, + "epoch": 5.672, + "grad_norm": 2.1371288299560547, + "learning_rate": 8.356197261710048e-06, + "loss": 1.346, + "mean_token_accuracy": 0.633928644657135, + "num_tokens": 6976885.0, + "step": 890 + }, + { + "entropy": 1.9162244260311128, + "epoch": 5.736, + "grad_norm": 1.9879032373428345, + "learning_rate": 8.152957569089552e-06, + "loss": 1.3486, + "mean_token_accuracy": 0.6311523199081421, + "num_tokens": 7053473.0, + "step": 900 + }, + { + "entropy": 1.89161317050457, + "epoch": 5.8, + "grad_norm": 2.2934179306030273, + "learning_rate": 7.950504798842513e-06, + "loss": 1.3699, + "mean_token_accuracy": 0.6269390240311623, + "num_tokens": 7133137.0, + "step": 910 + }, + { + "entropy": 1.888116827607155, + "epoch": 5.864, + "grad_norm": 1.769087791442871, + "learning_rate": 7.748925204869667e-06, + "loss": 1.3756, + "mean_token_accuracy": 0.6285945609211921, + "num_tokens": 7213693.0, + "step": 920 + }, + { + "entropy": 1.89390210211277, + "epoch": 5.928, + "grad_norm": 2.2577364444732666, + "learning_rate": 7.548304669059735e-06, + "loss": 1.3396, + "mean_token_accuracy": 0.6290415957570076, + "num_tokens": 7291999.0, + "step": 930 + }, + { + "entropy": 1.8755547761917115, + "epoch": 5.992, + "grad_norm": 2.080371618270874, + "learning_rate": 7.348728664699939e-06, + "loss": 1.3305, + "mean_token_accuracy": 0.6322756335139275, + "num_tokens": 7370138.0, + "step": 940 + }, + { + "entropy": 1.8396991845723745, + "epoch": 6.0512, + "grad_norm": 2.222177028656006, + "learning_rate": 7.150282220060564e-06, + "loss": 1.2782, + "mean_token_accuracy": 0.6437820018948736, + "num_tokens": 7444764.0, + "step": 950 + }, + { + "entropy": 1.864711531996727, + "epoch": 6.1152, + "grad_norm": 2.236663579940796, + "learning_rate": 6.9530498821691165e-06, + "loss": 1.342, + "mean_token_accuracy": 0.6400286257266998, + "num_tokens": 7523012.0, + "step": 960 + }, + { + "entropy": 1.8462383985519408, + "epoch": 6.1792, + "grad_norm": 2.438649892807007, + "learning_rate": 6.757115680789539e-06, + "loss": 1.2769, + "mean_token_accuracy": 0.6437345445156097, + "num_tokens": 7602451.0, + "step": 970 + }, + { + "entropy": 1.8425735771656035, + "epoch": 6.2432, + "grad_norm": 2.306880235671997, + "learning_rate": 6.562563092621776e-06, + "loss": 1.309, + "mean_token_accuracy": 0.6463457986712455, + "num_tokens": 7681972.0, + "step": 980 + }, + { + "entropy": 1.8139673799276352, + "epoch": 6.3072, + "grad_norm": 2.286114454269409, + "learning_rate": 6.369475005736984e-06, + "loss": 1.2748, + "mean_token_accuracy": 0.6487143859267235, + "num_tokens": 7762845.0, + "step": 990 + }, + { + "entropy": 1.8660429507493972, + "epoch": 6.3712, + "grad_norm": 2.421706199645996, + "learning_rate": 6.177933684263524e-06, + "loss": 1.2964, + "mean_token_accuracy": 0.6455973491072655, + "num_tokens": 7839552.0, + "step": 1000 + }, + { + "entropy": 1.8517659038305283, + "epoch": 6.4352, + "grad_norm": 2.3891334533691406, + "learning_rate": 5.988020733338767e-06, + "loss": 1.2893, + "mean_token_accuracy": 0.6442387655377388, + "num_tokens": 7915996.0, + "step": 1010 + }, + { + "entropy": 1.8547363132238388, + "epoch": 6.4992, + "grad_norm": 2.26686429977417, + "learning_rate": 5.7998170643416795e-06, + "loss": 1.2973, + "mean_token_accuracy": 0.6435917019844055, + "num_tokens": 7995119.0, + "step": 1020 + }, + { + "entropy": 1.8365773737430573, + "epoch": 6.5632, + "grad_norm": 2.1454896926879883, + "learning_rate": 5.613402860420962e-06, + "loss": 1.2744, + "mean_token_accuracy": 0.6410152271389962, + "num_tokens": 8075306.0, + "step": 1030 + }, + { + "entropy": 1.8936803489923477, + "epoch": 6.6272, + "grad_norm": 2.5226423740386963, + "learning_rate": 5.428857542333465e-06, + "loss": 1.3225, + "mean_token_accuracy": 0.6396260514855385, + "num_tokens": 8152449.0, + "step": 1040 + }, + { + "entropy": 1.8555004209280015, + "epoch": 6.6912, + "grad_norm": 2.216014862060547, + "learning_rate": 5.246259734607411e-06, + "loss": 1.299, + "mean_token_accuracy": 0.641279113292694, + "num_tokens": 8231904.0, + "step": 1050 + }, + { + "entropy": 1.8588679373264312, + "epoch": 6.7552, + "grad_norm": 2.4265236854553223, + "learning_rate": 5.065687232044811e-06, + "loss": 1.3026, + "mean_token_accuracy": 0.6363563358783721, + "num_tokens": 8310755.0, + "step": 1060 + }, + { + "entropy": 1.8318012267351151, + "epoch": 6.8192, + "grad_norm": 2.2089412212371826, + "learning_rate": 4.887216966577458e-06, + "loss": 1.2583, + "mean_token_accuracy": 0.6502064153552055, + "num_tokens": 8390161.0, + "step": 1070 + }, + { + "entropy": 1.8765722244977951, + "epoch": 6.8832, + "grad_norm": 2.3233554363250732, + "learning_rate": 4.710924974490463e-06, + "loss": 1.3223, + "mean_token_accuracy": 0.6393219083547592, + "num_tokens": 8469078.0, + "step": 1080 + }, + { + "entropy": 1.8413788318634032, + "epoch": 6.9472000000000005, + "grad_norm": 2.321904420852661, + "learning_rate": 4.536886364027428e-06, + "loss": 1.272, + "mean_token_accuracy": 0.647525629401207, + "num_tokens": 8547873.0, + "step": 1090 + }, + { + "entropy": 1.8666607818088017, + "epoch": 7.0064, + "grad_norm": 2.1004791259765625, + "learning_rate": 4.365175283390968e-06, + "loss": 1.2721, + "mean_token_accuracy": 0.6479364424138456, + "num_tokens": 8619109.0, + "step": 1100 + }, + { + "entropy": 1.8333647519350051, + "epoch": 7.0704, + "grad_norm": 2.9210190773010254, + "learning_rate": 4.195864889152295e-06, + "loss": 1.1833, + "mean_token_accuracy": 0.6699477419257164, + "num_tokens": 8692475.0, + "step": 1110 + }, + { + "entropy": 1.8425445258617401, + "epoch": 7.1344, + "grad_norm": 2.3149521350860596, + "learning_rate": 4.029027315083251e-06, + "loss": 1.2707, + "mean_token_accuracy": 0.650185227394104, + "num_tokens": 8770456.0, + "step": 1120 + }, + { + "entropy": 1.8121359765529632, + "epoch": 7.1984, + "grad_norm": 2.6502795219421387, + "learning_rate": 3.864733641424093e-06, + "loss": 1.2383, + "mean_token_accuracy": 0.6547705471515656, + "num_tokens": 8851214.0, + "step": 1130 + }, + { + "entropy": 1.802490884065628, + "epoch": 7.2624, + "grad_norm": 2.227534770965576, + "learning_rate": 3.703053864600169e-06, + "loss": 1.2603, + "mean_token_accuracy": 0.6489648431539535, + "num_tokens": 8932363.0, + "step": 1140 + }, + { + "entropy": 1.8214709132909774, + "epoch": 7.3264, + "grad_norm": 2.5923874378204346, + "learning_rate": 3.544056867400306e-06, + "loss": 1.248, + "mean_token_accuracy": 0.651621387898922, + "num_tokens": 9011734.0, + "step": 1150 + }, + { + "entropy": 1.826240959763527, + "epoch": 7.3904, + "grad_norm": 2.67551589012146, + "learning_rate": 3.3878103896296677e-06, + "loss": 1.2488, + "mean_token_accuracy": 0.6530374586582184, + "num_tokens": 9090277.0, + "step": 1160 + }, + { + "entropy": 1.837952870130539, + "epoch": 7.4544, + "grad_norm": 2.2191765308380127, + "learning_rate": 3.2343809992495945e-06, + "loss": 1.2704, + "mean_token_accuracy": 0.6503957703709602, + "num_tokens": 9168093.0, + "step": 1170 + }, + { + "entropy": 1.8135560542345046, + "epoch": 7.5184, + "grad_norm": 2.5211071968078613, + "learning_rate": 3.083834064016682e-06, + "loss": 1.2212, + "mean_token_accuracy": 0.6587097644805908, + "num_tokens": 9247777.0, + "step": 1180 + }, + { + "entropy": 1.8237973660230637, + "epoch": 7.5824, + "grad_norm": 2.6236841678619385, + "learning_rate": 2.9362337236331884e-06, + "loss": 1.2604, + "mean_token_accuracy": 0.6501624628901481, + "num_tokens": 9325367.0, + "step": 1190 + }, + { + "entropy": 1.836614164710045, + "epoch": 7.6464, + "grad_norm": 2.726731777191162, + "learning_rate": 2.791642862420686e-06, + "loss": 1.2554, + "mean_token_accuracy": 0.6520631939172745, + "num_tokens": 9403641.0, + "step": 1200 + }, + { + "entropy": 1.8044064462184906, + "epoch": 7.7104, + "grad_norm": 2.4943737983703613, + "learning_rate": 2.6501230825285294e-06, + "loss": 1.2519, + "mean_token_accuracy": 0.6524736672639847, + "num_tokens": 9484075.0, + "step": 1210 + }, + { + "entropy": 1.8258908241987228, + "epoch": 7.7744, + "grad_norm": 2.4426612854003906, + "learning_rate": 2.5117346776885843e-06, + "loss": 1.251, + "mean_token_accuracy": 0.6484281331300735, + "num_tokens": 9561148.0, + "step": 1220 + }, + { + "entropy": 1.8062447488307953, + "epoch": 7.8384, + "grad_norm": 2.465646266937256, + "learning_rate": 2.3765366075274287e-06, + "loss": 1.2662, + "mean_token_accuracy": 0.6492940753698349, + "num_tokens": 9642108.0, + "step": 1230 + }, + { + "entropy": 1.8293108910322189, + "epoch": 7.9024, + "grad_norm": 2.4230668544769287, + "learning_rate": 2.2445864724469146e-06, + "loss": 1.2625, + "mean_token_accuracy": 0.6592240884900094, + "num_tokens": 9719660.0, + "step": 1240 + }, + { + "entropy": 1.837513843178749, + "epoch": 7.9664, + "grad_norm": 2.7502171993255615, + "learning_rate": 2.1159404890838365e-06, + "loss": 1.2677, + "mean_token_accuracy": 0.6493206784129143, + "num_tokens": 9797593.0, + "step": 1250 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.470376847221719e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1256/training_args.bin b/checkpoint-1256/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-1256/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/checkpoint-1413/README.md b/checkpoint-1413/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-1413/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1413/adapter_config.json b/checkpoint-1413/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-1413/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1413/adapter_model.safetensors b/checkpoint-1413/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b38f7a0d4a98efa5368506eb722f3fe52b4108e --- /dev/null +++ b/checkpoint-1413/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17c621b866af0785e3749d72ecf7fc897f732a6261abf7b30ee22bfa3126753 +size 335604696 diff --git a/checkpoint-1413/chat_template.jinja b/checkpoint-1413/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-1413/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-1413/optimizer.pt b/checkpoint-1413/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bfd89946457821fd0d804429a94e6cb75abf697 --- /dev/null +++ b/checkpoint-1413/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a6de864b50a9ed23c94c8c51ee3e8467fd342a3a60f84bc5c6aac40d6b896dc +size 671473443 diff --git a/checkpoint-1413/rng_state.pth b/checkpoint-1413/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf4406fba302d4e78a5d1a053cd7c77fedaebb7c --- /dev/null +++ b/checkpoint-1413/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ceb8da45ee0dd063a3c91723e66222f732105ffb6e39c9772076df9424578e +size 14645 diff --git a/checkpoint-1413/scheduler.pt b/checkpoint-1413/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..54434c67c95bf754eb3681e1da0d8e57098944dd --- /dev/null +++ b/checkpoint-1413/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c8308b326ca34e482aacdc316d4eaa218ce5669c2b2fb82fa6608fefa112cdb +size 1465 diff --git a/checkpoint-1413/special_tokens_map.json b/checkpoint-1413/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-1413/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1413/tokenizer.json b/checkpoint-1413/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1413/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1413/tokenizer_config.json b/checkpoint-1413/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-1413/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1413/trainer_state.json b/checkpoint-1413/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c8ce95608391440b937738b80a30d9af40154cbd --- /dev/null +++ b/checkpoint-1413/trainer_state.json @@ -0,0 +1,1444 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 1413, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + }, + { + "entropy": 2.20564815804765, + "epoch": 1.0192, + "grad_norm": 0.570175290107727, + "learning_rate": 1.9738672135236218e-05, + "loss": 1.6118, + "mean_token_accuracy": 0.582583570802534, + "num_tokens": 1254363.0, + "step": 160 + }, + { + "entropy": 2.1847074955701826, + "epoch": 1.0832, + "grad_norm": 0.5836730003356934, + "learning_rate": 1.968972097000843e-05, + "loss": 1.6172, + "mean_token_accuracy": 0.5812226444482803, + "num_tokens": 1330281.0, + "step": 170 + }, + { + "entropy": 2.1814055383205413, + "epoch": 1.1472, + "grad_norm": 0.5746439695358276, + "learning_rate": 1.96366415519066e-05, + "loss": 1.6192, + "mean_token_accuracy": 0.5789176046848297, + "num_tokens": 1409407.0, + "step": 180 + }, + { + "entropy": 2.2038993716239927, + "epoch": 1.2112, + "grad_norm": 0.5652104616165161, + "learning_rate": 1.957945649512788e-05, + "loss": 1.6166, + "mean_token_accuracy": 0.5809548154473305, + "num_tokens": 1489034.0, + "step": 190 + }, + { + "entropy": 2.173789343237877, + "epoch": 1.2752, + "grad_norm": 0.6653291583061218, + "learning_rate": 1.951819016305442e-05, + "loss": 1.62, + "mean_token_accuracy": 0.5827470317482948, + "num_tokens": 1568549.0, + "step": 200 + }, + { + "entropy": 2.1907752990722655, + "epoch": 1.3392, + "grad_norm": 0.7024573087692261, + "learning_rate": 1.9452868657873513e-05, + "loss": 1.6397, + "mean_token_accuracy": 0.5796025812625885, + "num_tokens": 1647404.0, + "step": 210 + }, + { + "entropy": 2.189376249909401, + "epoch": 1.4032, + "grad_norm": 0.5727422833442688, + "learning_rate": 1.9383519809456862e-05, + "loss": 1.6349, + "mean_token_accuracy": 0.5815459445118905, + "num_tokens": 1728421.0, + "step": 220 + }, + { + "entropy": 2.209022229909897, + "epoch": 1.4672, + "grad_norm": 0.6421232223510742, + "learning_rate": 1.931017316350384e-05, + "loss": 1.6425, + "mean_token_accuracy": 0.5790404245257378, + "num_tokens": 1806891.0, + "step": 230 + }, + { + "entropy": 2.2337595343589784, + "epoch": 1.5312000000000001, + "grad_norm": 0.6296209692955017, + "learning_rate": 1.9232859968953702e-05, + "loss": 1.624, + "mean_token_accuracy": 0.5814317353069782, + "num_tokens": 1883100.0, + "step": 240 + }, + { + "entropy": 2.205833575129509, + "epoch": 1.5952, + "grad_norm": 0.6371021866798401, + "learning_rate": 1.9151613164672136e-05, + "loss": 1.6284, + "mean_token_accuracy": 0.5819905593991279, + "num_tokens": 1961317.0, + "step": 250 + }, + { + "entropy": 2.205822005867958, + "epoch": 1.6592, + "grad_norm": 0.6950616836547852, + "learning_rate": 1.9066467365417844e-05, + "loss": 1.6374, + "mean_token_accuracy": 0.5760326236486435, + "num_tokens": 2042881.0, + "step": 260 + }, + { + "entropy": 2.2163637399673464, + "epoch": 1.7231999999999998, + "grad_norm": 0.7801616191864014, + "learning_rate": 1.8977458847095117e-05, + "loss": 1.663, + "mean_token_accuracy": 0.5744953289628029, + "num_tokens": 2121403.0, + "step": 270 + }, + { + "entropy": 2.199243775010109, + "epoch": 1.7872, + "grad_norm": 0.6671239733695984, + "learning_rate": 1.888462553129867e-05, + "loss": 1.6456, + "mean_token_accuracy": 0.579181258380413, + "num_tokens": 2200908.0, + "step": 280 + }, + { + "entropy": 2.214826595783234, + "epoch": 1.8512, + "grad_norm": 0.7415009140968323, + "learning_rate": 1.878800696915737e-05, + "loss": 1.6113, + "mean_token_accuracy": 0.5840038731694221, + "num_tokens": 2278414.0, + "step": 290 + }, + { + "entropy": 2.187604659795761, + "epoch": 1.9152, + "grad_norm": 0.662319540977478, + "learning_rate": 1.868764432448369e-05, + "loss": 1.6182, + "mean_token_accuracy": 0.580166706442833, + "num_tokens": 2355826.0, + "step": 300 + }, + { + "entropy": 2.2184703826904295, + "epoch": 1.9792, + "grad_norm": 0.7123025059700012, + "learning_rate": 1.8583580356236065e-05, + "loss": 1.655, + "mean_token_accuracy": 0.5762834578752518, + "num_tokens": 2434933.0, + "step": 310 + }, + { + "entropy": 2.1887036239778674, + "epoch": 2.0384, + "grad_norm": 0.6846157312393188, + "learning_rate": 1.8475859400301708e-05, + "loss": 1.5935, + "mean_token_accuracy": 0.5881956976813238, + "num_tokens": 2507166.0, + "step": 320 + }, + { + "entropy": 2.102977079153061, + "epoch": 2.1024, + "grad_norm": 0.7967628240585327, + "learning_rate": 1.8364527350607527e-05, + "loss": 1.5405, + "mean_token_accuracy": 0.5946892097592353, + "num_tokens": 2584298.0, + "step": 330 + }, + { + "entropy": 2.118516767024994, + "epoch": 2.1664, + "grad_norm": 0.7417224645614624, + "learning_rate": 1.824963163956726e-05, + "loss": 1.5727, + "mean_token_accuracy": 0.5870080485939979, + "num_tokens": 2663601.0, + "step": 340 + }, + { + "entropy": 2.104418155550957, + "epoch": 2.2304, + "grad_norm": 0.7956721782684326, + "learning_rate": 1.8131221217873175e-05, + "loss": 1.5575, + "mean_token_accuracy": 0.5936456203460694, + "num_tokens": 2744783.0, + "step": 350 + }, + { + "entropy": 2.129578319191933, + "epoch": 2.2944, + "grad_norm": 0.769292950630188, + "learning_rate": 1.8009346533640877e-05, + "loss": 1.5878, + "mean_token_accuracy": 0.5841517195105552, + "num_tokens": 2823023.0, + "step": 360 + }, + { + "entropy": 2.097687366604805, + "epoch": 2.3584, + "grad_norm": 0.9341740608215332, + "learning_rate": 1.7884059510916167e-05, + "loss": 1.5346, + "mean_token_accuracy": 0.599460557103157, + "num_tokens": 2899598.0, + "step": 370 + }, + { + "entropy": 2.151599031686783, + "epoch": 2.4224, + "grad_norm": 0.8752340078353882, + "learning_rate": 1.7755413527553087e-05, + "loss": 1.5984, + "mean_token_accuracy": 0.585393351316452, + "num_tokens": 2978519.0, + "step": 380 + }, + { + "entropy": 2.1223404884338377, + "epoch": 2.4864, + "grad_norm": 1.0296390056610107, + "learning_rate": 1.7623463392472574e-05, + "loss": 1.5232, + "mean_token_accuracy": 0.595654422044754, + "num_tokens": 3055327.0, + "step": 390 + }, + { + "entropy": 2.16276493370533, + "epoch": 2.5504, + "grad_norm": 0.9905762672424316, + "learning_rate": 1.748826532231142e-05, + "loss": 1.6049, + "mean_token_accuracy": 0.5822189599275589, + "num_tokens": 3135348.0, + "step": 400 + }, + { + "entropy": 2.127479985356331, + "epoch": 2.6144, + "grad_norm": 0.851375162601471, + "learning_rate": 1.7349876917471474e-05, + "loss": 1.5842, + "mean_token_accuracy": 0.5855211839079857, + "num_tokens": 3213122.0, + "step": 410 + }, + { + "entropy": 2.167752879858017, + "epoch": 2.6784, + "grad_norm": 0.975143313407898, + "learning_rate": 1.7208357137579318e-05, + "loss": 1.5918, + "mean_token_accuracy": 0.5839722648262977, + "num_tokens": 3289583.0, + "step": 420 + }, + { + "entropy": 2.127084198594093, + "epoch": 2.7424, + "grad_norm": 0.8077936768531799, + "learning_rate": 1.7063766276366814e-05, + "loss": 1.5916, + "mean_token_accuracy": 0.5900941833853721, + "num_tokens": 3369740.0, + "step": 430 + }, + { + "entropy": 2.1315969794988634, + "epoch": 2.8064, + "grad_norm": 0.9403624534606934, + "learning_rate": 1.6916165935983323e-05, + "loss": 1.5713, + "mean_token_accuracy": 0.5892721861600876, + "num_tokens": 3448328.0, + "step": 440 + }, + { + "entropy": 2.130605939030647, + "epoch": 2.8704, + "grad_norm": 0.8252040147781372, + "learning_rate": 1.676561900075041e-05, + "loss": 1.6003, + "mean_token_accuracy": 0.5845118075609207, + "num_tokens": 3529853.0, + "step": 450 + }, + { + "entropy": 2.112012493610382, + "epoch": 2.9344, + "grad_norm": 0.9267668724060059, + "learning_rate": 1.6612189610370336e-05, + "loss": 1.5796, + "mean_token_accuracy": 0.5887707889080047, + "num_tokens": 3610922.0, + "step": 460 + }, + { + "entropy": 2.100590059161186, + "epoch": 2.9984, + "grad_norm": 0.8996879458427429, + "learning_rate": 1.6455943132599698e-05, + "loss": 1.5483, + "mean_token_accuracy": 0.5934251204133034, + "num_tokens": 3688391.0, + "step": 470 + }, + { + "entropy": 2.1115864160898568, + "epoch": 3.0576, + "grad_norm": 1.097270131111145, + "learning_rate": 1.6296946135399835e-05, + "loss": 1.5506, + "mean_token_accuracy": 0.592829834770512, + "num_tokens": 3758747.0, + "step": 480 + }, + { + "entropy": 2.0610430628061294, + "epoch": 3.1216, + "grad_norm": 1.176645278930664, + "learning_rate": 1.613526635857591e-05, + "loss": 1.4461, + "mean_token_accuracy": 0.6111307457089424, + "num_tokens": 3834689.0, + "step": 490 + }, + { + "entropy": 2.0154007196426393, + "epoch": 3.1856, + "grad_norm": 1.1834276914596558, + "learning_rate": 1.5970972684916754e-05, + "loss": 1.4852, + "mean_token_accuracy": 0.6026980608701706, + "num_tokens": 3916450.0, + "step": 500 + }, + { + "entropy": 2.0441433399915696, + "epoch": 3.2496, + "grad_norm": 1.159286379814148, + "learning_rate": 1.5804135110847708e-05, + "loss": 1.4978, + "mean_token_accuracy": 0.6042912915349007, + "num_tokens": 3998511.0, + "step": 510 + }, + { + "entropy": 2.0493109285831452, + "epoch": 3.3136, + "grad_norm": 1.2141708135604858, + "learning_rate": 1.5634824716609037e-05, + "loss": 1.5018, + "mean_token_accuracy": 0.5995921581983567, + "num_tokens": 4077676.0, + "step": 520 + }, + { + "entropy": 2.0533218771219253, + "epoch": 3.3776, + "grad_norm": 1.1630637645721436, + "learning_rate": 1.5463113635972577e-05, + "loss": 1.499, + "mean_token_accuracy": 0.6046154126524925, + "num_tokens": 4155264.0, + "step": 530 + }, + { + "entropy": 2.0600034058094026, + "epoch": 3.4416, + "grad_norm": 1.2523504495620728, + "learning_rate": 1.528907502550954e-05, + "loss": 1.521, + "mean_token_accuracy": 0.6000443026423454, + "num_tokens": 4233655.0, + "step": 540 + }, + { + "entropy": 2.0414596855640412, + "epoch": 3.5056000000000003, + "grad_norm": 1.3990252017974854, + "learning_rate": 1.5112783033422547e-05, + "loss": 1.4899, + "mean_token_accuracy": 0.6026965886354446, + "num_tokens": 4311644.0, + "step": 550 + }, + { + "entropy": 2.061043033003807, + "epoch": 3.5696, + "grad_norm": 1.1884260177612305, + "learning_rate": 1.4934312767955193e-05, + "loss": 1.5143, + "mean_token_accuracy": 0.5981319859623909, + "num_tokens": 4390933.0, + "step": 560 + }, + { + "entropy": 2.034099668264389, + "epoch": 3.6336, + "grad_norm": 1.2996599674224854, + "learning_rate": 1.4753740265392595e-05, + "loss": 1.4953, + "mean_token_accuracy": 0.6029247522354126, + "num_tokens": 4470462.0, + "step": 570 + }, + { + "entropy": 2.0379767954349517, + "epoch": 3.6976, + "grad_norm": 1.2936193943023682, + "learning_rate": 1.4571142457666536e-05, + "loss": 1.4965, + "mean_token_accuracy": 0.6041712030768395, + "num_tokens": 4549236.0, + "step": 580 + }, + { + "entropy": 2.040063351392746, + "epoch": 3.7616, + "grad_norm": 1.5094560384750366, + "learning_rate": 1.4386597139579041e-05, + "loss": 1.4979, + "mean_token_accuracy": 0.6051288455724716, + "num_tokens": 4628758.0, + "step": 590 + }, + { + "entropy": 1.9998936265707017, + "epoch": 3.8256, + "grad_norm": 1.3166426420211792, + "learning_rate": 1.4200182935658327e-05, + "loss": 1.459, + "mean_token_accuracy": 0.6084850415587425, + "num_tokens": 4708526.0, + "step": 600 + }, + { + "entropy": 2.0041965901851655, + "epoch": 3.8895999999999997, + "grad_norm": 1.2710400819778442, + "learning_rate": 1.4011979266661235e-05, + "loss": 1.4831, + "mean_token_accuracy": 0.6057328775525093, + "num_tokens": 4788733.0, + "step": 610 + }, + { + "entropy": 2.0265558779239656, + "epoch": 3.9536, + "grad_norm": 1.4318969249725342, + "learning_rate": 1.3822066315736477e-05, + "loss": 1.4966, + "mean_token_accuracy": 0.5994595810770988, + "num_tokens": 4866451.0, + "step": 620 + }, + { + "entropy": 2.0692459924800977, + "epoch": 4.0128, + "grad_norm": 1.2546013593673706, + "learning_rate": 1.363052499426302e-05, + "loss": 1.503, + "mean_token_accuracy": 0.6039850309088424, + "num_tokens": 4936715.0, + "step": 630 + }, + { + "entropy": 1.9788923293352128, + "epoch": 4.0768, + "grad_norm": 1.416927456855774, + "learning_rate": 1.3437436907378225e-05, + "loss": 1.4248, + "mean_token_accuracy": 0.6142558038234711, + "num_tokens": 5016713.0, + "step": 640 + }, + { + "entropy": 1.9646029412746429, + "epoch": 4.1408, + "grad_norm": 1.5146726369857788, + "learning_rate": 1.3242884319210463e-05, + "loss": 1.3875, + "mean_token_accuracy": 0.624424883723259, + "num_tokens": 5096513.0, + "step": 650 + }, + { + "entropy": 1.93471617102623, + "epoch": 4.2048, + "grad_norm": 1.5090768337249756, + "learning_rate": 1.3046950117830888e-05, + "loss": 1.3884, + "mean_token_accuracy": 0.6222448632121086, + "num_tokens": 5177075.0, + "step": 660 + }, + { + "entropy": 2.002266028523445, + "epoch": 4.2688, + "grad_norm": 1.74358332157135, + "learning_rate": 1.2849717779939439e-05, + "loss": 1.4062, + "mean_token_accuracy": 0.6180147424340248, + "num_tokens": 5252902.0, + "step": 670 + }, + { + "entropy": 1.9397415190935134, + "epoch": 4.3328, + "grad_norm": 1.774728775024414, + "learning_rate": 1.2651271335300063e-05, + "loss": 1.3933, + "mean_token_accuracy": 0.626343595981598, + "num_tokens": 5331448.0, + "step": 680 + }, + { + "entropy": 1.9571841150522231, + "epoch": 4.3968, + "grad_norm": 1.80965256690979, + "learning_rate": 1.2451695330940268e-05, + "loss": 1.4205, + "mean_token_accuracy": 0.6187710732221603, + "num_tokens": 5410857.0, + "step": 690 + }, + { + "entropy": 1.9691186994314194, + "epoch": 4.4608, + "grad_norm": 1.5400609970092773, + "learning_rate": 1.2251074795130339e-05, + "loss": 1.4123, + "mean_token_accuracy": 0.614769059419632, + "num_tokens": 5488867.0, + "step": 700 + }, + { + "entropy": 1.9635825514793397, + "epoch": 4.5248, + "grad_norm": 1.467608094215393, + "learning_rate": 1.2049495201157489e-05, + "loss": 1.4228, + "mean_token_accuracy": 0.6202724784612655, + "num_tokens": 5567515.0, + "step": 710 + }, + { + "entropy": 1.9384470194578172, + "epoch": 4.5888, + "grad_norm": 1.652387022972107, + "learning_rate": 1.1847042430910451e-05, + "loss": 1.4273, + "mean_token_accuracy": 0.6190450325608253, + "num_tokens": 5648858.0, + "step": 720 + }, + { + "entropy": 1.9911590039730072, + "epoch": 4.6528, + "grad_norm": 1.7492380142211914, + "learning_rate": 1.1643802738289955e-05, + "loss": 1.4776, + "mean_token_accuracy": 0.6073927089571953, + "num_tokens": 5725459.0, + "step": 730 + }, + { + "entropy": 1.9724233269691467, + "epoch": 4.7168, + "grad_norm": 1.709669828414917, + "learning_rate": 1.1439862712460721e-05, + "loss": 1.4217, + "mean_token_accuracy": 0.6184087961912155, + "num_tokens": 5801601.0, + "step": 740 + }, + { + "entropy": 1.9725236982107162, + "epoch": 4.7808, + "grad_norm": 1.7469470500946045, + "learning_rate": 1.1235309240960621e-05, + "loss": 1.405, + "mean_token_accuracy": 0.6196158319711685, + "num_tokens": 5881107.0, + "step": 750 + }, + { + "entropy": 1.9484833419322967, + "epoch": 4.8448, + "grad_norm": 1.532373309135437, + "learning_rate": 1.1030229472682719e-05, + "loss": 1.4155, + "mean_token_accuracy": 0.611663281917572, + "num_tokens": 5960375.0, + "step": 760 + }, + { + "entropy": 1.9964754343032838, + "epoch": 4.9088, + "grad_norm": 1.7157669067382812, + "learning_rate": 1.0824710780745954e-05, + "loss": 1.4295, + "mean_token_accuracy": 0.6131752103567123, + "num_tokens": 6038267.0, + "step": 770 + }, + { + "entropy": 1.9598666340112687, + "epoch": 4.9728, + "grad_norm": 1.9844586849212646, + "learning_rate": 1.06188407252703e-05, + "loss": 1.397, + "mean_token_accuracy": 0.6226776748895645, + "num_tokens": 6114749.0, + "step": 780 + }, + { + "entropy": 1.9227982276194804, + "epoch": 5.032, + "grad_norm": 1.8960447311401367, + "learning_rate": 1.0412707016072254e-05, + "loss": 1.3649, + "mean_token_accuracy": 0.6269845414806057, + "num_tokens": 6190567.0, + "step": 790 + }, + { + "entropy": 1.9008578658103943, + "epoch": 5.096, + "grad_norm": 2.1205599308013916, + "learning_rate": 1.0206397475296548e-05, + "loss": 1.3582, + "mean_token_accuracy": 0.6292989999055862, + "num_tokens": 6269285.0, + "step": 800 + }, + { + "entropy": 1.9224162876605988, + "epoch": 5.16, + "grad_norm": 2.0454013347625732, + "learning_rate": 1e-05, + "loss": 1.3349, + "mean_token_accuracy": 0.6315066292881966, + "num_tokens": 6345352.0, + "step": 810 + }, + { + "entropy": 1.9340467154979706, + "epoch": 5.224, + "grad_norm": 2.2607693672180176, + "learning_rate": 9.793602524703456e-06, + "loss": 1.359, + "mean_token_accuracy": 0.6322078078985214, + "num_tokens": 6422524.0, + "step": 820 + }, + { + "entropy": 1.9296668291091919, + "epoch": 5.288, + "grad_norm": 2.1245901584625244, + "learning_rate": 9.58729298392775e-06, + "loss": 1.3672, + "mean_token_accuracy": 0.6282135233283043, + "num_tokens": 6500128.0, + "step": 830 + }, + { + "entropy": 1.9272812247276305, + "epoch": 5.352, + "grad_norm": 1.965820550918579, + "learning_rate": 9.381159274729704e-06, + "loss": 1.3786, + "mean_token_accuracy": 0.6249860525131226, + "num_tokens": 6578766.0, + "step": 840 + }, + { + "entropy": 1.904970219731331, + "epoch": 5.416, + "grad_norm": 1.9188759326934814, + "learning_rate": 9.175289219254051e-06, + "loss": 1.3418, + "mean_token_accuracy": 0.6325456693768501, + "num_tokens": 6658732.0, + "step": 850 + }, + { + "entropy": 1.8833305448293687, + "epoch": 5.48, + "grad_norm": 1.9675428867340088, + "learning_rate": 8.969770527317283e-06, + "loss": 1.3274, + "mean_token_accuracy": 0.6377805054187775, + "num_tokens": 6738683.0, + "step": 860 + }, + { + "entropy": 1.8806802958250046, + "epoch": 5.5440000000000005, + "grad_norm": 1.8849304914474487, + "learning_rate": 8.764690759039382e-06, + "loss": 1.3109, + "mean_token_accuracy": 0.636364534497261, + "num_tokens": 6817786.0, + "step": 870 + }, + { + "entropy": 1.8846195042133331, + "epoch": 5.608, + "grad_norm": 2.050208330154419, + "learning_rate": 8.56013728753928e-06, + "loss": 1.3449, + "mean_token_accuracy": 0.6316975012421608, + "num_tokens": 6896222.0, + "step": 880 + }, + { + "entropy": 1.88524529337883, + "epoch": 5.672, + "grad_norm": 2.1371288299560547, + "learning_rate": 8.356197261710048e-06, + "loss": 1.346, + "mean_token_accuracy": 0.633928644657135, + "num_tokens": 6976885.0, + "step": 890 + }, + { + "entropy": 1.9162244260311128, + "epoch": 5.736, + "grad_norm": 1.9879032373428345, + "learning_rate": 8.152957569089552e-06, + "loss": 1.3486, + "mean_token_accuracy": 0.6311523199081421, + "num_tokens": 7053473.0, + "step": 900 + }, + { + "entropy": 1.89161317050457, + "epoch": 5.8, + "grad_norm": 2.2934179306030273, + "learning_rate": 7.950504798842513e-06, + "loss": 1.3699, + "mean_token_accuracy": 0.6269390240311623, + "num_tokens": 7133137.0, + "step": 910 + }, + { + "entropy": 1.888116827607155, + "epoch": 5.864, + "grad_norm": 1.769087791442871, + "learning_rate": 7.748925204869667e-06, + "loss": 1.3756, + "mean_token_accuracy": 0.6285945609211921, + "num_tokens": 7213693.0, + "step": 920 + }, + { + "entropy": 1.89390210211277, + "epoch": 5.928, + "grad_norm": 2.2577364444732666, + "learning_rate": 7.548304669059735e-06, + "loss": 1.3396, + "mean_token_accuracy": 0.6290415957570076, + "num_tokens": 7291999.0, + "step": 930 + }, + { + "entropy": 1.8755547761917115, + "epoch": 5.992, + "grad_norm": 2.080371618270874, + "learning_rate": 7.348728664699939e-06, + "loss": 1.3305, + "mean_token_accuracy": 0.6322756335139275, + "num_tokens": 7370138.0, + "step": 940 + }, + { + "entropy": 1.8396991845723745, + "epoch": 6.0512, + "grad_norm": 2.222177028656006, + "learning_rate": 7.150282220060564e-06, + "loss": 1.2782, + "mean_token_accuracy": 0.6437820018948736, + "num_tokens": 7444764.0, + "step": 950 + }, + { + "entropy": 1.864711531996727, + "epoch": 6.1152, + "grad_norm": 2.236663579940796, + "learning_rate": 6.9530498821691165e-06, + "loss": 1.342, + "mean_token_accuracy": 0.6400286257266998, + "num_tokens": 7523012.0, + "step": 960 + }, + { + "entropy": 1.8462383985519408, + "epoch": 6.1792, + "grad_norm": 2.438649892807007, + "learning_rate": 6.757115680789539e-06, + "loss": 1.2769, + "mean_token_accuracy": 0.6437345445156097, + "num_tokens": 7602451.0, + "step": 970 + }, + { + "entropy": 1.8425735771656035, + "epoch": 6.2432, + "grad_norm": 2.306880235671997, + "learning_rate": 6.562563092621776e-06, + "loss": 1.309, + "mean_token_accuracy": 0.6463457986712455, + "num_tokens": 7681972.0, + "step": 980 + }, + { + "entropy": 1.8139673799276352, + "epoch": 6.3072, + "grad_norm": 2.286114454269409, + "learning_rate": 6.369475005736984e-06, + "loss": 1.2748, + "mean_token_accuracy": 0.6487143859267235, + "num_tokens": 7762845.0, + "step": 990 + }, + { + "entropy": 1.8660429507493972, + "epoch": 6.3712, + "grad_norm": 2.421706199645996, + "learning_rate": 6.177933684263524e-06, + "loss": 1.2964, + "mean_token_accuracy": 0.6455973491072655, + "num_tokens": 7839552.0, + "step": 1000 + }, + { + "entropy": 1.8517659038305283, + "epoch": 6.4352, + "grad_norm": 2.3891334533691406, + "learning_rate": 5.988020733338767e-06, + "loss": 1.2893, + "mean_token_accuracy": 0.6442387655377388, + "num_tokens": 7915996.0, + "step": 1010 + }, + { + "entropy": 1.8547363132238388, + "epoch": 6.4992, + "grad_norm": 2.26686429977417, + "learning_rate": 5.7998170643416795e-06, + "loss": 1.2973, + "mean_token_accuracy": 0.6435917019844055, + "num_tokens": 7995119.0, + "step": 1020 + }, + { + "entropy": 1.8365773737430573, + "epoch": 6.5632, + "grad_norm": 2.1454896926879883, + "learning_rate": 5.613402860420962e-06, + "loss": 1.2744, + "mean_token_accuracy": 0.6410152271389962, + "num_tokens": 8075306.0, + "step": 1030 + }, + { + "entropy": 1.8936803489923477, + "epoch": 6.6272, + "grad_norm": 2.5226423740386963, + "learning_rate": 5.428857542333465e-06, + "loss": 1.3225, + "mean_token_accuracy": 0.6396260514855385, + "num_tokens": 8152449.0, + "step": 1040 + }, + { + "entropy": 1.8555004209280015, + "epoch": 6.6912, + "grad_norm": 2.216014862060547, + "learning_rate": 5.246259734607411e-06, + "loss": 1.299, + "mean_token_accuracy": 0.641279113292694, + "num_tokens": 8231904.0, + "step": 1050 + }, + { + "entropy": 1.8588679373264312, + "epoch": 6.7552, + "grad_norm": 2.4265236854553223, + "learning_rate": 5.065687232044811e-06, + "loss": 1.3026, + "mean_token_accuracy": 0.6363563358783721, + "num_tokens": 8310755.0, + "step": 1060 + }, + { + "entropy": 1.8318012267351151, + "epoch": 6.8192, + "grad_norm": 2.2089412212371826, + "learning_rate": 4.887216966577458e-06, + "loss": 1.2583, + "mean_token_accuracy": 0.6502064153552055, + "num_tokens": 8390161.0, + "step": 1070 + }, + { + "entropy": 1.8765722244977951, + "epoch": 6.8832, + "grad_norm": 2.3233554363250732, + "learning_rate": 4.710924974490463e-06, + "loss": 1.3223, + "mean_token_accuracy": 0.6393219083547592, + "num_tokens": 8469078.0, + "step": 1080 + }, + { + "entropy": 1.8413788318634032, + "epoch": 6.9472000000000005, + "grad_norm": 2.321904420852661, + "learning_rate": 4.536886364027428e-06, + "loss": 1.272, + "mean_token_accuracy": 0.647525629401207, + "num_tokens": 8547873.0, + "step": 1090 + }, + { + "entropy": 1.8666607818088017, + "epoch": 7.0064, + "grad_norm": 2.1004791259765625, + "learning_rate": 4.365175283390968e-06, + "loss": 1.2721, + "mean_token_accuracy": 0.6479364424138456, + "num_tokens": 8619109.0, + "step": 1100 + }, + { + "entropy": 1.8333647519350051, + "epoch": 7.0704, + "grad_norm": 2.9210190773010254, + "learning_rate": 4.195864889152295e-06, + "loss": 1.1833, + "mean_token_accuracy": 0.6699477419257164, + "num_tokens": 8692475.0, + "step": 1110 + }, + { + "entropy": 1.8425445258617401, + "epoch": 7.1344, + "grad_norm": 2.3149521350860596, + "learning_rate": 4.029027315083251e-06, + "loss": 1.2707, + "mean_token_accuracy": 0.650185227394104, + "num_tokens": 8770456.0, + "step": 1120 + }, + { + "entropy": 1.8121359765529632, + "epoch": 7.1984, + "grad_norm": 2.6502795219421387, + "learning_rate": 3.864733641424093e-06, + "loss": 1.2383, + "mean_token_accuracy": 0.6547705471515656, + "num_tokens": 8851214.0, + "step": 1130 + }, + { + "entropy": 1.802490884065628, + "epoch": 7.2624, + "grad_norm": 2.227534770965576, + "learning_rate": 3.703053864600169e-06, + "loss": 1.2603, + "mean_token_accuracy": 0.6489648431539535, + "num_tokens": 8932363.0, + "step": 1140 + }, + { + "entropy": 1.8214709132909774, + "epoch": 7.3264, + "grad_norm": 2.5923874378204346, + "learning_rate": 3.544056867400306e-06, + "loss": 1.248, + "mean_token_accuracy": 0.651621387898922, + "num_tokens": 9011734.0, + "step": 1150 + }, + { + "entropy": 1.826240959763527, + "epoch": 7.3904, + "grad_norm": 2.67551589012146, + "learning_rate": 3.3878103896296677e-06, + "loss": 1.2488, + "mean_token_accuracy": 0.6530374586582184, + "num_tokens": 9090277.0, + "step": 1160 + }, + { + "entropy": 1.837952870130539, + "epoch": 7.4544, + "grad_norm": 2.2191765308380127, + "learning_rate": 3.2343809992495945e-06, + "loss": 1.2704, + "mean_token_accuracy": 0.6503957703709602, + "num_tokens": 9168093.0, + "step": 1170 + }, + { + "entropy": 1.8135560542345046, + "epoch": 7.5184, + "grad_norm": 2.5211071968078613, + "learning_rate": 3.083834064016682e-06, + "loss": 1.2212, + "mean_token_accuracy": 0.6587097644805908, + "num_tokens": 9247777.0, + "step": 1180 + }, + { + "entropy": 1.8237973660230637, + "epoch": 7.5824, + "grad_norm": 2.6236841678619385, + "learning_rate": 2.9362337236331884e-06, + "loss": 1.2604, + "mean_token_accuracy": 0.6501624628901481, + "num_tokens": 9325367.0, + "step": 1190 + }, + { + "entropy": 1.836614164710045, + "epoch": 7.6464, + "grad_norm": 2.726731777191162, + "learning_rate": 2.791642862420686e-06, + "loss": 1.2554, + "mean_token_accuracy": 0.6520631939172745, + "num_tokens": 9403641.0, + "step": 1200 + }, + { + "entropy": 1.8044064462184906, + "epoch": 7.7104, + "grad_norm": 2.4943737983703613, + "learning_rate": 2.6501230825285294e-06, + "loss": 1.2519, + "mean_token_accuracy": 0.6524736672639847, + "num_tokens": 9484075.0, + "step": 1210 + }, + { + "entropy": 1.8258908241987228, + "epoch": 7.7744, + "grad_norm": 2.4426612854003906, + "learning_rate": 2.5117346776885843e-06, + "loss": 1.251, + "mean_token_accuracy": 0.6484281331300735, + "num_tokens": 9561148.0, + "step": 1220 + }, + { + "entropy": 1.8062447488307953, + "epoch": 7.8384, + "grad_norm": 2.465646266937256, + "learning_rate": 2.3765366075274287e-06, + "loss": 1.2662, + "mean_token_accuracy": 0.6492940753698349, + "num_tokens": 9642108.0, + "step": 1230 + }, + { + "entropy": 1.8293108910322189, + "epoch": 7.9024, + "grad_norm": 2.4230668544769287, + "learning_rate": 2.2445864724469146e-06, + "loss": 1.2625, + "mean_token_accuracy": 0.6592240884900094, + "num_tokens": 9719660.0, + "step": 1240 + }, + { + "entropy": 1.837513843178749, + "epoch": 7.9664, + "grad_norm": 2.7502171993255615, + "learning_rate": 2.1159404890838365e-06, + "loss": 1.2677, + "mean_token_accuracy": 0.6493206784129143, + "num_tokens": 9797593.0, + "step": 1250 + }, + { + "entropy": 1.8162316245001715, + "epoch": 8.0256, + "grad_norm": 2.5199058055877686, + "learning_rate": 1.990653466359125e-06, + "loss": 1.2293, + "mean_token_accuracy": 0.656300467413825, + "num_tokens": 9871177.0, + "step": 1260 + }, + { + "entropy": 1.780314788222313, + "epoch": 8.0896, + "grad_norm": 2.5237162113189697, + "learning_rate": 1.8687787821268255e-06, + "loss": 1.1791, + "mean_token_accuracy": 0.6675050809979439, + "num_tokens": 9949391.0, + "step": 1270 + }, + { + "entropy": 1.779639583826065, + "epoch": 8.1536, + "grad_norm": 2.4559428691864014, + "learning_rate": 1.7503683604327426e-06, + "loss": 1.2177, + "mean_token_accuracy": 0.6600575730204582, + "num_tokens": 10030182.0, + "step": 1280 + }, + { + "entropy": 1.7865025967359542, + "epoch": 8.2176, + "grad_norm": 2.9508230686187744, + "learning_rate": 1.6354726493924745e-06, + "loss": 1.1937, + "mean_token_accuracy": 0.6630557537078857, + "num_tokens": 10107960.0, + "step": 1290 + }, + { + "entropy": 1.8122670024633407, + "epoch": 8.2816, + "grad_norm": 2.6917898654937744, + "learning_rate": 1.5241405996982928e-06, + "loss": 1.2319, + "mean_token_accuracy": 0.6598842918872834, + "num_tokens": 10185524.0, + "step": 1300 + }, + { + "entropy": 1.806730917096138, + "epoch": 8.3456, + "grad_norm": 2.7887086868286133, + "learning_rate": 1.4164196437639355e-06, + "loss": 1.25, + "mean_token_accuracy": 0.6578737393021583, + "num_tokens": 10265123.0, + "step": 1310 + }, + { + "entropy": 1.8156007081270218, + "epoch": 8.4096, + "grad_norm": 2.9965310096740723, + "learning_rate": 1.3123556755163114e-06, + "loss": 1.234, + "mean_token_accuracy": 0.6579165816307068, + "num_tokens": 10342205.0, + "step": 1320 + }, + { + "entropy": 1.8044028550386428, + "epoch": 8.4736, + "grad_norm": 2.9466843605041504, + "learning_rate": 1.2119930308426264e-06, + "loss": 1.2423, + "mean_token_accuracy": 0.6527451828122139, + "num_tokens": 10420603.0, + "step": 1330 + }, + { + "entropy": 1.8250535994768142, + "epoch": 8.5376, + "grad_norm": 2.9452784061431885, + "learning_rate": 1.1153744687013313e-06, + "loss": 1.258, + "mean_token_accuracy": 0.6589037463068962, + "num_tokens": 10499049.0, + "step": 1340 + }, + { + "entropy": 1.7990799486637115, + "epoch": 8.6016, + "grad_norm": 2.6469309329986572, + "learning_rate": 1.0225411529048857e-06, + "loss": 1.2415, + "mean_token_accuracy": 0.6555879130959511, + "num_tokens": 10578178.0, + "step": 1350 + }, + { + "entropy": 1.7638877242803574, + "epoch": 8.6656, + "grad_norm": 2.77990460395813, + "learning_rate": 9.33532634582156e-07, + "loss": 1.2143, + "mean_token_accuracy": 0.6589834168553352, + "num_tokens": 10659018.0, + "step": 1360 + }, + { + "entropy": 1.8123771637678145, + "epoch": 8.7296, + "grad_norm": 3.1158993244171143, + "learning_rate": 8.483868353278657e-07, + "loss": 1.2358, + "mean_token_accuracy": 0.6561313390731811, + "num_tokens": 10736582.0, + "step": 1370 + }, + { + "entropy": 1.8054670304059983, + "epoch": 8.7936, + "grad_norm": 2.915422201156616, + "learning_rate": 7.671400310462984e-07, + "loss": 1.2089, + "mean_token_accuracy": 0.6610309720039368, + "num_tokens": 10814534.0, + "step": 1380 + }, + { + "entropy": 1.800497230887413, + "epoch": 8.8576, + "grad_norm": 2.7816338539123535, + "learning_rate": 6.898268364961591e-07, + "loss": 1.2227, + "mean_token_accuracy": 0.6584793984889984, + "num_tokens": 10893484.0, + "step": 1390 + }, + { + "entropy": 1.7850348353385925, + "epoch": 8.9216, + "grad_norm": 2.569054126739502, + "learning_rate": 6.164801905431394e-07, + "loss": 1.2242, + "mean_token_accuracy": 0.6574000924825668, + "num_tokens": 10973818.0, + "step": 1400 + }, + { + "entropy": 1.795585972070694, + "epoch": 8.9856, + "grad_norm": 2.6529977321624756, + "learning_rate": 5.471313421264879e-07, + "loss": 1.2127, + "mean_token_accuracy": 0.6600923746824264, + "num_tokens": 11051396.0, + "step": 1410 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.066034365626581e+18, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1413/training_args.bin b/checkpoint-1413/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-1413/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/checkpoint-157/README.md b/checkpoint-157/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-157/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-157/adapter_config.json b/checkpoint-157/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-157/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-157/adapter_model.safetensors b/checkpoint-157/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..751c225c6ab896e1067fb84106f5f989aa439dd1 --- /dev/null +++ b/checkpoint-157/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e82ffb8017b5edc71ef7aa84c1ff77df96be47a39cddb4bfb2cf282d3268371b +size 335604696 diff --git a/checkpoint-157/chat_template.jinja b/checkpoint-157/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-157/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-157/optimizer.pt b/checkpoint-157/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bc8a87416dea88ae2c8bb235777896c6c54e77e --- /dev/null +++ b/checkpoint-157/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed83a41df63c03c42d8746c0a1edca6c4d4a5feefad42668fcdfc2e459129346 +size 671473443 diff --git a/checkpoint-157/rng_state.pth b/checkpoint-157/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ede38702774dc86fb760af8d16212194a427716 --- /dev/null +++ b/checkpoint-157/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b8e36d8bc339f13e158d4a9a2cce56eb46788dec0de01fd45407a3c6b3882ca +size 14645 diff --git a/checkpoint-157/scheduler.pt b/checkpoint-157/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e17b1dd48834fce3551d55f76d3159345e619a2 --- /dev/null +++ b/checkpoint-157/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b24a6a04700d51b0eaccb461e7f9c4848367f9209d248c5ef2160107e4529ada +size 1465 diff --git a/checkpoint-157/special_tokens_map.json b/checkpoint-157/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-157/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-157/tokenizer.json b/checkpoint-157/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-157/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-157/tokenizer_config.json b/checkpoint-157/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-157/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-157/trainer_state.json b/checkpoint-157/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3530337618896beebd5da5ae2013e1af378fffb3 --- /dev/null +++ b/checkpoint-157/trainer_state.json @@ -0,0 +1,184 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 157, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1886117509962138e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-157/training_args.bin b/checkpoint-157/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-157/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/checkpoint-1570/README.md b/checkpoint-1570/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-1570/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-1570/adapter_config.json b/checkpoint-1570/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-1570/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-1570/adapter_model.safetensors b/checkpoint-1570/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4ccf5f6dd2a454aa20553128e450582be9da57c --- /dev/null +++ b/checkpoint-1570/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b21edd23f1a8a08cecfc9ddce41121f0ac4fe3805ec052dcd4b02a4ab7fc10d +size 335604696 diff --git a/checkpoint-1570/chat_template.jinja b/checkpoint-1570/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-1570/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-1570/optimizer.pt b/checkpoint-1570/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b16adfc3dee7afc41bb5aecd087f4ac80001c2b0 --- /dev/null +++ b/checkpoint-1570/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9771d368a11bcd7b2b6811a152b692cc90e4ee9162c7619ac43968f73aefe1a3 +size 671473443 diff --git a/checkpoint-1570/rng_state.pth b/checkpoint-1570/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f0efded468dd4b4b66966081427272e70e130900 --- /dev/null +++ b/checkpoint-1570/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c4dba511df88500ecd690979c8e4c6a0958f63e4966a419e043a3ecfb54c9f9 +size 14645 diff --git a/checkpoint-1570/scheduler.pt b/checkpoint-1570/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a46c3495f861d040558c1c50c74560d96042bc33 --- /dev/null +++ b/checkpoint-1570/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97117bdf474ed4fd5b0c1fa3eaaf8cb96ba5d11c274eeee19b38209406efda1f +size 1465 diff --git a/checkpoint-1570/special_tokens_map.json b/checkpoint-1570/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-1570/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-1570/tokenizer.json b/checkpoint-1570/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-1570/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-1570/tokenizer_config.json b/checkpoint-1570/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-1570/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-1570/trainer_state.json b/checkpoint-1570/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b6ce7e6b479244e1a96ff689ad6ab47ac2943545 --- /dev/null +++ b/checkpoint-1570/trainer_state.json @@ -0,0 +1,1604 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 1570, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + }, + { + "entropy": 2.20564815804765, + "epoch": 1.0192, + "grad_norm": 0.570175290107727, + "learning_rate": 1.9738672135236218e-05, + "loss": 1.6118, + "mean_token_accuracy": 0.582583570802534, + "num_tokens": 1254363.0, + "step": 160 + }, + { + "entropy": 2.1847074955701826, + "epoch": 1.0832, + "grad_norm": 0.5836730003356934, + "learning_rate": 1.968972097000843e-05, + "loss": 1.6172, + "mean_token_accuracy": 0.5812226444482803, + "num_tokens": 1330281.0, + "step": 170 + }, + { + "entropy": 2.1814055383205413, + "epoch": 1.1472, + "grad_norm": 0.5746439695358276, + "learning_rate": 1.96366415519066e-05, + "loss": 1.6192, + "mean_token_accuracy": 0.5789176046848297, + "num_tokens": 1409407.0, + "step": 180 + }, + { + "entropy": 2.2038993716239927, + "epoch": 1.2112, + "grad_norm": 0.5652104616165161, + "learning_rate": 1.957945649512788e-05, + "loss": 1.6166, + "mean_token_accuracy": 0.5809548154473305, + "num_tokens": 1489034.0, + "step": 190 + }, + { + "entropy": 2.173789343237877, + "epoch": 1.2752, + "grad_norm": 0.6653291583061218, + "learning_rate": 1.951819016305442e-05, + "loss": 1.62, + "mean_token_accuracy": 0.5827470317482948, + "num_tokens": 1568549.0, + "step": 200 + }, + { + "entropy": 2.1907752990722655, + "epoch": 1.3392, + "grad_norm": 0.7024573087692261, + "learning_rate": 1.9452868657873513e-05, + "loss": 1.6397, + "mean_token_accuracy": 0.5796025812625885, + "num_tokens": 1647404.0, + "step": 210 + }, + { + "entropy": 2.189376249909401, + "epoch": 1.4032, + "grad_norm": 0.5727422833442688, + "learning_rate": 1.9383519809456862e-05, + "loss": 1.6349, + "mean_token_accuracy": 0.5815459445118905, + "num_tokens": 1728421.0, + "step": 220 + }, + { + "entropy": 2.209022229909897, + "epoch": 1.4672, + "grad_norm": 0.6421232223510742, + "learning_rate": 1.931017316350384e-05, + "loss": 1.6425, + "mean_token_accuracy": 0.5790404245257378, + "num_tokens": 1806891.0, + "step": 230 + }, + { + "entropy": 2.2337595343589784, + "epoch": 1.5312000000000001, + "grad_norm": 0.6296209692955017, + "learning_rate": 1.9232859968953702e-05, + "loss": 1.624, + "mean_token_accuracy": 0.5814317353069782, + "num_tokens": 1883100.0, + "step": 240 + }, + { + "entropy": 2.205833575129509, + "epoch": 1.5952, + "grad_norm": 0.6371021866798401, + "learning_rate": 1.9151613164672136e-05, + "loss": 1.6284, + "mean_token_accuracy": 0.5819905593991279, + "num_tokens": 1961317.0, + "step": 250 + }, + { + "entropy": 2.205822005867958, + "epoch": 1.6592, + "grad_norm": 0.6950616836547852, + "learning_rate": 1.9066467365417844e-05, + "loss": 1.6374, + "mean_token_accuracy": 0.5760326236486435, + "num_tokens": 2042881.0, + "step": 260 + }, + { + "entropy": 2.2163637399673464, + "epoch": 1.7231999999999998, + "grad_norm": 0.7801616191864014, + "learning_rate": 1.8977458847095117e-05, + "loss": 1.663, + "mean_token_accuracy": 0.5744953289628029, + "num_tokens": 2121403.0, + "step": 270 + }, + { + "entropy": 2.199243775010109, + "epoch": 1.7872, + "grad_norm": 0.6671239733695984, + "learning_rate": 1.888462553129867e-05, + "loss": 1.6456, + "mean_token_accuracy": 0.579181258380413, + "num_tokens": 2200908.0, + "step": 280 + }, + { + "entropy": 2.214826595783234, + "epoch": 1.8512, + "grad_norm": 0.7415009140968323, + "learning_rate": 1.878800696915737e-05, + "loss": 1.6113, + "mean_token_accuracy": 0.5840038731694221, + "num_tokens": 2278414.0, + "step": 290 + }, + { + "entropy": 2.187604659795761, + "epoch": 1.9152, + "grad_norm": 0.662319540977478, + "learning_rate": 1.868764432448369e-05, + "loss": 1.6182, + "mean_token_accuracy": 0.580166706442833, + "num_tokens": 2355826.0, + "step": 300 + }, + { + "entropy": 2.2184703826904295, + "epoch": 1.9792, + "grad_norm": 0.7123025059700012, + "learning_rate": 1.8583580356236065e-05, + "loss": 1.655, + "mean_token_accuracy": 0.5762834578752518, + "num_tokens": 2434933.0, + "step": 310 + }, + { + "entropy": 2.1887036239778674, + "epoch": 2.0384, + "grad_norm": 0.6846157312393188, + "learning_rate": 1.8475859400301708e-05, + "loss": 1.5935, + "mean_token_accuracy": 0.5881956976813238, + "num_tokens": 2507166.0, + "step": 320 + }, + { + "entropy": 2.102977079153061, + "epoch": 2.1024, + "grad_norm": 0.7967628240585327, + "learning_rate": 1.8364527350607527e-05, + "loss": 1.5405, + "mean_token_accuracy": 0.5946892097592353, + "num_tokens": 2584298.0, + "step": 330 + }, + { + "entropy": 2.118516767024994, + "epoch": 2.1664, + "grad_norm": 0.7417224645614624, + "learning_rate": 1.824963163956726e-05, + "loss": 1.5727, + "mean_token_accuracy": 0.5870080485939979, + "num_tokens": 2663601.0, + "step": 340 + }, + { + "entropy": 2.104418155550957, + "epoch": 2.2304, + "grad_norm": 0.7956721782684326, + "learning_rate": 1.8131221217873175e-05, + "loss": 1.5575, + "mean_token_accuracy": 0.5936456203460694, + "num_tokens": 2744783.0, + "step": 350 + }, + { + "entropy": 2.129578319191933, + "epoch": 2.2944, + "grad_norm": 0.769292950630188, + "learning_rate": 1.8009346533640877e-05, + "loss": 1.5878, + "mean_token_accuracy": 0.5841517195105552, + "num_tokens": 2823023.0, + "step": 360 + }, + { + "entropy": 2.097687366604805, + "epoch": 2.3584, + "grad_norm": 0.9341740608215332, + "learning_rate": 1.7884059510916167e-05, + "loss": 1.5346, + "mean_token_accuracy": 0.599460557103157, + "num_tokens": 2899598.0, + "step": 370 + }, + { + "entropy": 2.151599031686783, + "epoch": 2.4224, + "grad_norm": 0.8752340078353882, + "learning_rate": 1.7755413527553087e-05, + "loss": 1.5984, + "mean_token_accuracy": 0.585393351316452, + "num_tokens": 2978519.0, + "step": 380 + }, + { + "entropy": 2.1223404884338377, + "epoch": 2.4864, + "grad_norm": 1.0296390056610107, + "learning_rate": 1.7623463392472574e-05, + "loss": 1.5232, + "mean_token_accuracy": 0.595654422044754, + "num_tokens": 3055327.0, + "step": 390 + }, + { + "entropy": 2.16276493370533, + "epoch": 2.5504, + "grad_norm": 0.9905762672424316, + "learning_rate": 1.748826532231142e-05, + "loss": 1.6049, + "mean_token_accuracy": 0.5822189599275589, + "num_tokens": 3135348.0, + "step": 400 + }, + { + "entropy": 2.127479985356331, + "epoch": 2.6144, + "grad_norm": 0.851375162601471, + "learning_rate": 1.7349876917471474e-05, + "loss": 1.5842, + "mean_token_accuracy": 0.5855211839079857, + "num_tokens": 3213122.0, + "step": 410 + }, + { + "entropy": 2.167752879858017, + "epoch": 2.6784, + "grad_norm": 0.975143313407898, + "learning_rate": 1.7208357137579318e-05, + "loss": 1.5918, + "mean_token_accuracy": 0.5839722648262977, + "num_tokens": 3289583.0, + "step": 420 + }, + { + "entropy": 2.127084198594093, + "epoch": 2.7424, + "grad_norm": 0.8077936768531799, + "learning_rate": 1.7063766276366814e-05, + "loss": 1.5916, + "mean_token_accuracy": 0.5900941833853721, + "num_tokens": 3369740.0, + "step": 430 + }, + { + "entropy": 2.1315969794988634, + "epoch": 2.8064, + "grad_norm": 0.9403624534606934, + "learning_rate": 1.6916165935983323e-05, + "loss": 1.5713, + "mean_token_accuracy": 0.5892721861600876, + "num_tokens": 3448328.0, + "step": 440 + }, + { + "entropy": 2.130605939030647, + "epoch": 2.8704, + "grad_norm": 0.8252040147781372, + "learning_rate": 1.676561900075041e-05, + "loss": 1.6003, + "mean_token_accuracy": 0.5845118075609207, + "num_tokens": 3529853.0, + "step": 450 + }, + { + "entropy": 2.112012493610382, + "epoch": 2.9344, + "grad_norm": 0.9267668724060059, + "learning_rate": 1.6612189610370336e-05, + "loss": 1.5796, + "mean_token_accuracy": 0.5887707889080047, + "num_tokens": 3610922.0, + "step": 460 + }, + { + "entropy": 2.100590059161186, + "epoch": 2.9984, + "grad_norm": 0.8996879458427429, + "learning_rate": 1.6455943132599698e-05, + "loss": 1.5483, + "mean_token_accuracy": 0.5934251204133034, + "num_tokens": 3688391.0, + "step": 470 + }, + { + "entropy": 2.1115864160898568, + "epoch": 3.0576, + "grad_norm": 1.097270131111145, + "learning_rate": 1.6296946135399835e-05, + "loss": 1.5506, + "mean_token_accuracy": 0.592829834770512, + "num_tokens": 3758747.0, + "step": 480 + }, + { + "entropy": 2.0610430628061294, + "epoch": 3.1216, + "grad_norm": 1.176645278930664, + "learning_rate": 1.613526635857591e-05, + "loss": 1.4461, + "mean_token_accuracy": 0.6111307457089424, + "num_tokens": 3834689.0, + "step": 490 + }, + { + "entropy": 2.0154007196426393, + "epoch": 3.1856, + "grad_norm": 1.1834276914596558, + "learning_rate": 1.5970972684916754e-05, + "loss": 1.4852, + "mean_token_accuracy": 0.6026980608701706, + "num_tokens": 3916450.0, + "step": 500 + }, + { + "entropy": 2.0441433399915696, + "epoch": 3.2496, + "grad_norm": 1.159286379814148, + "learning_rate": 1.5804135110847708e-05, + "loss": 1.4978, + "mean_token_accuracy": 0.6042912915349007, + "num_tokens": 3998511.0, + "step": 510 + }, + { + "entropy": 2.0493109285831452, + "epoch": 3.3136, + "grad_norm": 1.2141708135604858, + "learning_rate": 1.5634824716609037e-05, + "loss": 1.5018, + "mean_token_accuracy": 0.5995921581983567, + "num_tokens": 4077676.0, + "step": 520 + }, + { + "entropy": 2.0533218771219253, + "epoch": 3.3776, + "grad_norm": 1.1630637645721436, + "learning_rate": 1.5463113635972577e-05, + "loss": 1.499, + "mean_token_accuracy": 0.6046154126524925, + "num_tokens": 4155264.0, + "step": 530 + }, + { + "entropy": 2.0600034058094026, + "epoch": 3.4416, + "grad_norm": 1.2523504495620728, + "learning_rate": 1.528907502550954e-05, + "loss": 1.521, + "mean_token_accuracy": 0.6000443026423454, + "num_tokens": 4233655.0, + "step": 540 + }, + { + "entropy": 2.0414596855640412, + "epoch": 3.5056000000000003, + "grad_norm": 1.3990252017974854, + "learning_rate": 1.5112783033422547e-05, + "loss": 1.4899, + "mean_token_accuracy": 0.6026965886354446, + "num_tokens": 4311644.0, + "step": 550 + }, + { + "entropy": 2.061043033003807, + "epoch": 3.5696, + "grad_norm": 1.1884260177612305, + "learning_rate": 1.4934312767955193e-05, + "loss": 1.5143, + "mean_token_accuracy": 0.5981319859623909, + "num_tokens": 4390933.0, + "step": 560 + }, + { + "entropy": 2.034099668264389, + "epoch": 3.6336, + "grad_norm": 1.2996599674224854, + "learning_rate": 1.4753740265392595e-05, + "loss": 1.4953, + "mean_token_accuracy": 0.6029247522354126, + "num_tokens": 4470462.0, + "step": 570 + }, + { + "entropy": 2.0379767954349517, + "epoch": 3.6976, + "grad_norm": 1.2936193943023682, + "learning_rate": 1.4571142457666536e-05, + "loss": 1.4965, + "mean_token_accuracy": 0.6041712030768395, + "num_tokens": 4549236.0, + "step": 580 + }, + { + "entropy": 2.040063351392746, + "epoch": 3.7616, + "grad_norm": 1.5094560384750366, + "learning_rate": 1.4386597139579041e-05, + "loss": 1.4979, + "mean_token_accuracy": 0.6051288455724716, + "num_tokens": 4628758.0, + "step": 590 + }, + { + "entropy": 1.9998936265707017, + "epoch": 3.8256, + "grad_norm": 1.3166426420211792, + "learning_rate": 1.4200182935658327e-05, + "loss": 1.459, + "mean_token_accuracy": 0.6084850415587425, + "num_tokens": 4708526.0, + "step": 600 + }, + { + "entropy": 2.0041965901851655, + "epoch": 3.8895999999999997, + "grad_norm": 1.2710400819778442, + "learning_rate": 1.4011979266661235e-05, + "loss": 1.4831, + "mean_token_accuracy": 0.6057328775525093, + "num_tokens": 4788733.0, + "step": 610 + }, + { + "entropy": 2.0265558779239656, + "epoch": 3.9536, + "grad_norm": 1.4318969249725342, + "learning_rate": 1.3822066315736477e-05, + "loss": 1.4966, + "mean_token_accuracy": 0.5994595810770988, + "num_tokens": 4866451.0, + "step": 620 + }, + { + "entropy": 2.0692459924800977, + "epoch": 4.0128, + "grad_norm": 1.2546013593673706, + "learning_rate": 1.363052499426302e-05, + "loss": 1.503, + "mean_token_accuracy": 0.6039850309088424, + "num_tokens": 4936715.0, + "step": 630 + }, + { + "entropy": 1.9788923293352128, + "epoch": 4.0768, + "grad_norm": 1.416927456855774, + "learning_rate": 1.3437436907378225e-05, + "loss": 1.4248, + "mean_token_accuracy": 0.6142558038234711, + "num_tokens": 5016713.0, + "step": 640 + }, + { + "entropy": 1.9646029412746429, + "epoch": 4.1408, + "grad_norm": 1.5146726369857788, + "learning_rate": 1.3242884319210463e-05, + "loss": 1.3875, + "mean_token_accuracy": 0.624424883723259, + "num_tokens": 5096513.0, + "step": 650 + }, + { + "entropy": 1.93471617102623, + "epoch": 4.2048, + "grad_norm": 1.5090768337249756, + "learning_rate": 1.3046950117830888e-05, + "loss": 1.3884, + "mean_token_accuracy": 0.6222448632121086, + "num_tokens": 5177075.0, + "step": 660 + }, + { + "entropy": 2.002266028523445, + "epoch": 4.2688, + "grad_norm": 1.74358332157135, + "learning_rate": 1.2849717779939439e-05, + "loss": 1.4062, + "mean_token_accuracy": 0.6180147424340248, + "num_tokens": 5252902.0, + "step": 670 + }, + { + "entropy": 1.9397415190935134, + "epoch": 4.3328, + "grad_norm": 1.774728775024414, + "learning_rate": 1.2651271335300063e-05, + "loss": 1.3933, + "mean_token_accuracy": 0.626343595981598, + "num_tokens": 5331448.0, + "step": 680 + }, + { + "entropy": 1.9571841150522231, + "epoch": 4.3968, + "grad_norm": 1.80965256690979, + "learning_rate": 1.2451695330940268e-05, + "loss": 1.4205, + "mean_token_accuracy": 0.6187710732221603, + "num_tokens": 5410857.0, + "step": 690 + }, + { + "entropy": 1.9691186994314194, + "epoch": 4.4608, + "grad_norm": 1.5400609970092773, + "learning_rate": 1.2251074795130339e-05, + "loss": 1.4123, + "mean_token_accuracy": 0.614769059419632, + "num_tokens": 5488867.0, + "step": 700 + }, + { + "entropy": 1.9635825514793397, + "epoch": 4.5248, + "grad_norm": 1.467608094215393, + "learning_rate": 1.2049495201157489e-05, + "loss": 1.4228, + "mean_token_accuracy": 0.6202724784612655, + "num_tokens": 5567515.0, + "step": 710 + }, + { + "entropy": 1.9384470194578172, + "epoch": 4.5888, + "grad_norm": 1.652387022972107, + "learning_rate": 1.1847042430910451e-05, + "loss": 1.4273, + "mean_token_accuracy": 0.6190450325608253, + "num_tokens": 5648858.0, + "step": 720 + }, + { + "entropy": 1.9911590039730072, + "epoch": 4.6528, + "grad_norm": 1.7492380142211914, + "learning_rate": 1.1643802738289955e-05, + "loss": 1.4776, + "mean_token_accuracy": 0.6073927089571953, + "num_tokens": 5725459.0, + "step": 730 + }, + { + "entropy": 1.9724233269691467, + "epoch": 4.7168, + "grad_norm": 1.709669828414917, + "learning_rate": 1.1439862712460721e-05, + "loss": 1.4217, + "mean_token_accuracy": 0.6184087961912155, + "num_tokens": 5801601.0, + "step": 740 + }, + { + "entropy": 1.9725236982107162, + "epoch": 4.7808, + "grad_norm": 1.7469470500946045, + "learning_rate": 1.1235309240960621e-05, + "loss": 1.405, + "mean_token_accuracy": 0.6196158319711685, + "num_tokens": 5881107.0, + "step": 750 + }, + { + "entropy": 1.9484833419322967, + "epoch": 4.8448, + "grad_norm": 1.532373309135437, + "learning_rate": 1.1030229472682719e-05, + "loss": 1.4155, + "mean_token_accuracy": 0.611663281917572, + "num_tokens": 5960375.0, + "step": 760 + }, + { + "entropy": 1.9964754343032838, + "epoch": 4.9088, + "grad_norm": 1.7157669067382812, + "learning_rate": 1.0824710780745954e-05, + "loss": 1.4295, + "mean_token_accuracy": 0.6131752103567123, + "num_tokens": 6038267.0, + "step": 770 + }, + { + "entropy": 1.9598666340112687, + "epoch": 4.9728, + "grad_norm": 1.9844586849212646, + "learning_rate": 1.06188407252703e-05, + "loss": 1.397, + "mean_token_accuracy": 0.6226776748895645, + "num_tokens": 6114749.0, + "step": 780 + }, + { + "entropy": 1.9227982276194804, + "epoch": 5.032, + "grad_norm": 1.8960447311401367, + "learning_rate": 1.0412707016072254e-05, + "loss": 1.3649, + "mean_token_accuracy": 0.6269845414806057, + "num_tokens": 6190567.0, + "step": 790 + }, + { + "entropy": 1.9008578658103943, + "epoch": 5.096, + "grad_norm": 2.1205599308013916, + "learning_rate": 1.0206397475296548e-05, + "loss": 1.3582, + "mean_token_accuracy": 0.6292989999055862, + "num_tokens": 6269285.0, + "step": 800 + }, + { + "entropy": 1.9224162876605988, + "epoch": 5.16, + "grad_norm": 2.0454013347625732, + "learning_rate": 1e-05, + "loss": 1.3349, + "mean_token_accuracy": 0.6315066292881966, + "num_tokens": 6345352.0, + "step": 810 + }, + { + "entropy": 1.9340467154979706, + "epoch": 5.224, + "grad_norm": 2.2607693672180176, + "learning_rate": 9.793602524703456e-06, + "loss": 1.359, + "mean_token_accuracy": 0.6322078078985214, + "num_tokens": 6422524.0, + "step": 820 + }, + { + "entropy": 1.9296668291091919, + "epoch": 5.288, + "grad_norm": 2.1245901584625244, + "learning_rate": 9.58729298392775e-06, + "loss": 1.3672, + "mean_token_accuracy": 0.6282135233283043, + "num_tokens": 6500128.0, + "step": 830 + }, + { + "entropy": 1.9272812247276305, + "epoch": 5.352, + "grad_norm": 1.965820550918579, + "learning_rate": 9.381159274729704e-06, + "loss": 1.3786, + "mean_token_accuracy": 0.6249860525131226, + "num_tokens": 6578766.0, + "step": 840 + }, + { + "entropy": 1.904970219731331, + "epoch": 5.416, + "grad_norm": 1.9188759326934814, + "learning_rate": 9.175289219254051e-06, + "loss": 1.3418, + "mean_token_accuracy": 0.6325456693768501, + "num_tokens": 6658732.0, + "step": 850 + }, + { + "entropy": 1.8833305448293687, + "epoch": 5.48, + "grad_norm": 1.9675428867340088, + "learning_rate": 8.969770527317283e-06, + "loss": 1.3274, + "mean_token_accuracy": 0.6377805054187775, + "num_tokens": 6738683.0, + "step": 860 + }, + { + "entropy": 1.8806802958250046, + "epoch": 5.5440000000000005, + "grad_norm": 1.8849304914474487, + "learning_rate": 8.764690759039382e-06, + "loss": 1.3109, + "mean_token_accuracy": 0.636364534497261, + "num_tokens": 6817786.0, + "step": 870 + }, + { + "entropy": 1.8846195042133331, + "epoch": 5.608, + "grad_norm": 2.050208330154419, + "learning_rate": 8.56013728753928e-06, + "loss": 1.3449, + "mean_token_accuracy": 0.6316975012421608, + "num_tokens": 6896222.0, + "step": 880 + }, + { + "entropy": 1.88524529337883, + "epoch": 5.672, + "grad_norm": 2.1371288299560547, + "learning_rate": 8.356197261710048e-06, + "loss": 1.346, + "mean_token_accuracy": 0.633928644657135, + "num_tokens": 6976885.0, + "step": 890 + }, + { + "entropy": 1.9162244260311128, + "epoch": 5.736, + "grad_norm": 1.9879032373428345, + "learning_rate": 8.152957569089552e-06, + "loss": 1.3486, + "mean_token_accuracy": 0.6311523199081421, + "num_tokens": 7053473.0, + "step": 900 + }, + { + "entropy": 1.89161317050457, + "epoch": 5.8, + "grad_norm": 2.2934179306030273, + "learning_rate": 7.950504798842513e-06, + "loss": 1.3699, + "mean_token_accuracy": 0.6269390240311623, + "num_tokens": 7133137.0, + "step": 910 + }, + { + "entropy": 1.888116827607155, + "epoch": 5.864, + "grad_norm": 1.769087791442871, + "learning_rate": 7.748925204869667e-06, + "loss": 1.3756, + "mean_token_accuracy": 0.6285945609211921, + "num_tokens": 7213693.0, + "step": 920 + }, + { + "entropy": 1.89390210211277, + "epoch": 5.928, + "grad_norm": 2.2577364444732666, + "learning_rate": 7.548304669059735e-06, + "loss": 1.3396, + "mean_token_accuracy": 0.6290415957570076, + "num_tokens": 7291999.0, + "step": 930 + }, + { + "entropy": 1.8755547761917115, + "epoch": 5.992, + "grad_norm": 2.080371618270874, + "learning_rate": 7.348728664699939e-06, + "loss": 1.3305, + "mean_token_accuracy": 0.6322756335139275, + "num_tokens": 7370138.0, + "step": 940 + }, + { + "entropy": 1.8396991845723745, + "epoch": 6.0512, + "grad_norm": 2.222177028656006, + "learning_rate": 7.150282220060564e-06, + "loss": 1.2782, + "mean_token_accuracy": 0.6437820018948736, + "num_tokens": 7444764.0, + "step": 950 + }, + { + "entropy": 1.864711531996727, + "epoch": 6.1152, + "grad_norm": 2.236663579940796, + "learning_rate": 6.9530498821691165e-06, + "loss": 1.342, + "mean_token_accuracy": 0.6400286257266998, + "num_tokens": 7523012.0, + "step": 960 + }, + { + "entropy": 1.8462383985519408, + "epoch": 6.1792, + "grad_norm": 2.438649892807007, + "learning_rate": 6.757115680789539e-06, + "loss": 1.2769, + "mean_token_accuracy": 0.6437345445156097, + "num_tokens": 7602451.0, + "step": 970 + }, + { + "entropy": 1.8425735771656035, + "epoch": 6.2432, + "grad_norm": 2.306880235671997, + "learning_rate": 6.562563092621776e-06, + "loss": 1.309, + "mean_token_accuracy": 0.6463457986712455, + "num_tokens": 7681972.0, + "step": 980 + }, + { + "entropy": 1.8139673799276352, + "epoch": 6.3072, + "grad_norm": 2.286114454269409, + "learning_rate": 6.369475005736984e-06, + "loss": 1.2748, + "mean_token_accuracy": 0.6487143859267235, + "num_tokens": 7762845.0, + "step": 990 + }, + { + "entropy": 1.8660429507493972, + "epoch": 6.3712, + "grad_norm": 2.421706199645996, + "learning_rate": 6.177933684263524e-06, + "loss": 1.2964, + "mean_token_accuracy": 0.6455973491072655, + "num_tokens": 7839552.0, + "step": 1000 + }, + { + "entropy": 1.8517659038305283, + "epoch": 6.4352, + "grad_norm": 2.3891334533691406, + "learning_rate": 5.988020733338767e-06, + "loss": 1.2893, + "mean_token_accuracy": 0.6442387655377388, + "num_tokens": 7915996.0, + "step": 1010 + }, + { + "entropy": 1.8547363132238388, + "epoch": 6.4992, + "grad_norm": 2.26686429977417, + "learning_rate": 5.7998170643416795e-06, + "loss": 1.2973, + "mean_token_accuracy": 0.6435917019844055, + "num_tokens": 7995119.0, + "step": 1020 + }, + { + "entropy": 1.8365773737430573, + "epoch": 6.5632, + "grad_norm": 2.1454896926879883, + "learning_rate": 5.613402860420962e-06, + "loss": 1.2744, + "mean_token_accuracy": 0.6410152271389962, + "num_tokens": 8075306.0, + "step": 1030 + }, + { + "entropy": 1.8936803489923477, + "epoch": 6.6272, + "grad_norm": 2.5226423740386963, + "learning_rate": 5.428857542333465e-06, + "loss": 1.3225, + "mean_token_accuracy": 0.6396260514855385, + "num_tokens": 8152449.0, + "step": 1040 + }, + { + "entropy": 1.8555004209280015, + "epoch": 6.6912, + "grad_norm": 2.216014862060547, + "learning_rate": 5.246259734607411e-06, + "loss": 1.299, + "mean_token_accuracy": 0.641279113292694, + "num_tokens": 8231904.0, + "step": 1050 + }, + { + "entropy": 1.8588679373264312, + "epoch": 6.7552, + "grad_norm": 2.4265236854553223, + "learning_rate": 5.065687232044811e-06, + "loss": 1.3026, + "mean_token_accuracy": 0.6363563358783721, + "num_tokens": 8310755.0, + "step": 1060 + }, + { + "entropy": 1.8318012267351151, + "epoch": 6.8192, + "grad_norm": 2.2089412212371826, + "learning_rate": 4.887216966577458e-06, + "loss": 1.2583, + "mean_token_accuracy": 0.6502064153552055, + "num_tokens": 8390161.0, + "step": 1070 + }, + { + "entropy": 1.8765722244977951, + "epoch": 6.8832, + "grad_norm": 2.3233554363250732, + "learning_rate": 4.710924974490463e-06, + "loss": 1.3223, + "mean_token_accuracy": 0.6393219083547592, + "num_tokens": 8469078.0, + "step": 1080 + }, + { + "entropy": 1.8413788318634032, + "epoch": 6.9472000000000005, + "grad_norm": 2.321904420852661, + "learning_rate": 4.536886364027428e-06, + "loss": 1.272, + "mean_token_accuracy": 0.647525629401207, + "num_tokens": 8547873.0, + "step": 1090 + }, + { + "entropy": 1.8666607818088017, + "epoch": 7.0064, + "grad_norm": 2.1004791259765625, + "learning_rate": 4.365175283390968e-06, + "loss": 1.2721, + "mean_token_accuracy": 0.6479364424138456, + "num_tokens": 8619109.0, + "step": 1100 + }, + { + "entropy": 1.8333647519350051, + "epoch": 7.0704, + "grad_norm": 2.9210190773010254, + "learning_rate": 4.195864889152295e-06, + "loss": 1.1833, + "mean_token_accuracy": 0.6699477419257164, + "num_tokens": 8692475.0, + "step": 1110 + }, + { + "entropy": 1.8425445258617401, + "epoch": 7.1344, + "grad_norm": 2.3149521350860596, + "learning_rate": 4.029027315083251e-06, + "loss": 1.2707, + "mean_token_accuracy": 0.650185227394104, + "num_tokens": 8770456.0, + "step": 1120 + }, + { + "entropy": 1.8121359765529632, + "epoch": 7.1984, + "grad_norm": 2.6502795219421387, + "learning_rate": 3.864733641424093e-06, + "loss": 1.2383, + "mean_token_accuracy": 0.6547705471515656, + "num_tokens": 8851214.0, + "step": 1130 + }, + { + "entropy": 1.802490884065628, + "epoch": 7.2624, + "grad_norm": 2.227534770965576, + "learning_rate": 3.703053864600169e-06, + "loss": 1.2603, + "mean_token_accuracy": 0.6489648431539535, + "num_tokens": 8932363.0, + "step": 1140 + }, + { + "entropy": 1.8214709132909774, + "epoch": 7.3264, + "grad_norm": 2.5923874378204346, + "learning_rate": 3.544056867400306e-06, + "loss": 1.248, + "mean_token_accuracy": 0.651621387898922, + "num_tokens": 9011734.0, + "step": 1150 + }, + { + "entropy": 1.826240959763527, + "epoch": 7.3904, + "grad_norm": 2.67551589012146, + "learning_rate": 3.3878103896296677e-06, + "loss": 1.2488, + "mean_token_accuracy": 0.6530374586582184, + "num_tokens": 9090277.0, + "step": 1160 + }, + { + "entropy": 1.837952870130539, + "epoch": 7.4544, + "grad_norm": 2.2191765308380127, + "learning_rate": 3.2343809992495945e-06, + "loss": 1.2704, + "mean_token_accuracy": 0.6503957703709602, + "num_tokens": 9168093.0, + "step": 1170 + }, + { + "entropy": 1.8135560542345046, + "epoch": 7.5184, + "grad_norm": 2.5211071968078613, + "learning_rate": 3.083834064016682e-06, + "loss": 1.2212, + "mean_token_accuracy": 0.6587097644805908, + "num_tokens": 9247777.0, + "step": 1180 + }, + { + "entropy": 1.8237973660230637, + "epoch": 7.5824, + "grad_norm": 2.6236841678619385, + "learning_rate": 2.9362337236331884e-06, + "loss": 1.2604, + "mean_token_accuracy": 0.6501624628901481, + "num_tokens": 9325367.0, + "step": 1190 + }, + { + "entropy": 1.836614164710045, + "epoch": 7.6464, + "grad_norm": 2.726731777191162, + "learning_rate": 2.791642862420686e-06, + "loss": 1.2554, + "mean_token_accuracy": 0.6520631939172745, + "num_tokens": 9403641.0, + "step": 1200 + }, + { + "entropy": 1.8044064462184906, + "epoch": 7.7104, + "grad_norm": 2.4943737983703613, + "learning_rate": 2.6501230825285294e-06, + "loss": 1.2519, + "mean_token_accuracy": 0.6524736672639847, + "num_tokens": 9484075.0, + "step": 1210 + }, + { + "entropy": 1.8258908241987228, + "epoch": 7.7744, + "grad_norm": 2.4426612854003906, + "learning_rate": 2.5117346776885843e-06, + "loss": 1.251, + "mean_token_accuracy": 0.6484281331300735, + "num_tokens": 9561148.0, + "step": 1220 + }, + { + "entropy": 1.8062447488307953, + "epoch": 7.8384, + "grad_norm": 2.465646266937256, + "learning_rate": 2.3765366075274287e-06, + "loss": 1.2662, + "mean_token_accuracy": 0.6492940753698349, + "num_tokens": 9642108.0, + "step": 1230 + }, + { + "entropy": 1.8293108910322189, + "epoch": 7.9024, + "grad_norm": 2.4230668544769287, + "learning_rate": 2.2445864724469146e-06, + "loss": 1.2625, + "mean_token_accuracy": 0.6592240884900094, + "num_tokens": 9719660.0, + "step": 1240 + }, + { + "entropy": 1.837513843178749, + "epoch": 7.9664, + "grad_norm": 2.7502171993255615, + "learning_rate": 2.1159404890838365e-06, + "loss": 1.2677, + "mean_token_accuracy": 0.6493206784129143, + "num_tokens": 9797593.0, + "step": 1250 + }, + { + "entropy": 1.8162316245001715, + "epoch": 8.0256, + "grad_norm": 2.5199058055877686, + "learning_rate": 1.990653466359125e-06, + "loss": 1.2293, + "mean_token_accuracy": 0.656300467413825, + "num_tokens": 9871177.0, + "step": 1260 + }, + { + "entropy": 1.780314788222313, + "epoch": 8.0896, + "grad_norm": 2.5237162113189697, + "learning_rate": 1.8687787821268255e-06, + "loss": 1.1791, + "mean_token_accuracy": 0.6675050809979439, + "num_tokens": 9949391.0, + "step": 1270 + }, + { + "entropy": 1.779639583826065, + "epoch": 8.1536, + "grad_norm": 2.4559428691864014, + "learning_rate": 1.7503683604327426e-06, + "loss": 1.2177, + "mean_token_accuracy": 0.6600575730204582, + "num_tokens": 10030182.0, + "step": 1280 + }, + { + "entropy": 1.7865025967359542, + "epoch": 8.2176, + "grad_norm": 2.9508230686187744, + "learning_rate": 1.6354726493924745e-06, + "loss": 1.1937, + "mean_token_accuracy": 0.6630557537078857, + "num_tokens": 10107960.0, + "step": 1290 + }, + { + "entropy": 1.8122670024633407, + "epoch": 8.2816, + "grad_norm": 2.6917898654937744, + "learning_rate": 1.5241405996982928e-06, + "loss": 1.2319, + "mean_token_accuracy": 0.6598842918872834, + "num_tokens": 10185524.0, + "step": 1300 + }, + { + "entropy": 1.806730917096138, + "epoch": 8.3456, + "grad_norm": 2.7887086868286133, + "learning_rate": 1.4164196437639355e-06, + "loss": 1.25, + "mean_token_accuracy": 0.6578737393021583, + "num_tokens": 10265123.0, + "step": 1310 + }, + { + "entropy": 1.8156007081270218, + "epoch": 8.4096, + "grad_norm": 2.9965310096740723, + "learning_rate": 1.3123556755163114e-06, + "loss": 1.234, + "mean_token_accuracy": 0.6579165816307068, + "num_tokens": 10342205.0, + "step": 1320 + }, + { + "entropy": 1.8044028550386428, + "epoch": 8.4736, + "grad_norm": 2.9466843605041504, + "learning_rate": 1.2119930308426264e-06, + "loss": 1.2423, + "mean_token_accuracy": 0.6527451828122139, + "num_tokens": 10420603.0, + "step": 1330 + }, + { + "entropy": 1.8250535994768142, + "epoch": 8.5376, + "grad_norm": 2.9452784061431885, + "learning_rate": 1.1153744687013313e-06, + "loss": 1.258, + "mean_token_accuracy": 0.6589037463068962, + "num_tokens": 10499049.0, + "step": 1340 + }, + { + "entropy": 1.7990799486637115, + "epoch": 8.6016, + "grad_norm": 2.6469309329986572, + "learning_rate": 1.0225411529048857e-06, + "loss": 1.2415, + "mean_token_accuracy": 0.6555879130959511, + "num_tokens": 10578178.0, + "step": 1350 + }, + { + "entropy": 1.7638877242803574, + "epoch": 8.6656, + "grad_norm": 2.77990460395813, + "learning_rate": 9.33532634582156e-07, + "loss": 1.2143, + "mean_token_accuracy": 0.6589834168553352, + "num_tokens": 10659018.0, + "step": 1360 + }, + { + "entropy": 1.8123771637678145, + "epoch": 8.7296, + "grad_norm": 3.1158993244171143, + "learning_rate": 8.483868353278657e-07, + "loss": 1.2358, + "mean_token_accuracy": 0.6561313390731811, + "num_tokens": 10736582.0, + "step": 1370 + }, + { + "entropy": 1.8054670304059983, + "epoch": 8.7936, + "grad_norm": 2.915422201156616, + "learning_rate": 7.671400310462984e-07, + "loss": 1.2089, + "mean_token_accuracy": 0.6610309720039368, + "num_tokens": 10814534.0, + "step": 1380 + }, + { + "entropy": 1.800497230887413, + "epoch": 8.8576, + "grad_norm": 2.7816338539123535, + "learning_rate": 6.898268364961591e-07, + "loss": 1.2227, + "mean_token_accuracy": 0.6584793984889984, + "num_tokens": 10893484.0, + "step": 1390 + }, + { + "entropy": 1.7850348353385925, + "epoch": 8.9216, + "grad_norm": 2.569054126739502, + "learning_rate": 6.164801905431394e-07, + "loss": 1.2242, + "mean_token_accuracy": 0.6574000924825668, + "num_tokens": 10973818.0, + "step": 1400 + }, + { + "entropy": 1.795585972070694, + "epoch": 8.9856, + "grad_norm": 2.6529977321624756, + "learning_rate": 5.471313421264879e-07, + "loss": 1.2127, + "mean_token_accuracy": 0.6600923746824264, + "num_tokens": 11051396.0, + "step": 1410 + }, + { + "entropy": 1.8172799606580992, + "epoch": 9.0448, + "grad_norm": 2.719399929046631, + "learning_rate": 4.818098369455793e-07, + "loss": 1.2758, + "mean_token_accuracy": 0.6563853702029666, + "num_tokens": 11124338.0, + "step": 1420 + }, + { + "entropy": 1.7982712090015411, + "epoch": 9.1088, + "grad_norm": 2.516369581222534, + "learning_rate": 4.20543504872124e-07, + "loss": 1.2054, + "mean_token_accuracy": 0.6637166649103164, + "num_tokens": 11202233.0, + "step": 1430 + }, + { + "entropy": 1.795827680826187, + "epoch": 9.1728, + "grad_norm": 3.1275811195373535, + "learning_rate": 3.633584480934016e-07, + "loss": 1.1907, + "mean_token_accuracy": 0.6667785882949829, + "num_tokens": 11279587.0, + "step": 1440 + }, + { + "entropy": 1.7738620430231093, + "epoch": 9.2368, + "grad_norm": 2.6204917430877686, + "learning_rate": 3.1027902999157146e-07, + "loss": 1.2156, + "mean_token_accuracy": 0.6609065368771553, + "num_tokens": 11360243.0, + "step": 1450 + }, + { + "entropy": 1.7866268098354339, + "epoch": 9.3008, + "grad_norm": 2.763274908065796, + "learning_rate": 2.61327864763784e-07, + "loss": 1.2109, + "mean_token_accuracy": 0.6604277700185776, + "num_tokens": 11440689.0, + "step": 1460 + }, + { + "entropy": 1.7874858051538467, + "epoch": 9.3648, + "grad_norm": 2.725693464279175, + "learning_rate": 2.1652580778751875e-07, + "loss": 1.2379, + "mean_token_accuracy": 0.6640482068061828, + "num_tokens": 11520425.0, + "step": 1470 + }, + { + "entropy": 1.7999387830495834, + "epoch": 9.4288, + "grad_norm": 2.849959135055542, + "learning_rate": 1.758919467352771e-07, + "loss": 1.2453, + "mean_token_accuracy": 0.652675162255764, + "num_tokens": 11600907.0, + "step": 1480 + }, + { + "entropy": 1.7698762983083725, + "epoch": 9.4928, + "grad_norm": 2.384965419769287, + "learning_rate": 1.3944359344237214e-07, + "loss": 1.2038, + "mean_token_accuracy": 0.6633729308843612, + "num_tokens": 11680986.0, + "step": 1490 + }, + { + "entropy": 1.7817810475826263, + "epoch": 9.556799999999999, + "grad_norm": 2.6171748638153076, + "learning_rate": 1.0719627653131948e-07, + "loss": 1.2052, + "mean_token_accuracy": 0.662623830139637, + "num_tokens": 11759645.0, + "step": 1500 + }, + { + "entropy": 1.7850700795650483, + "epoch": 9.6208, + "grad_norm": 2.759584426879883, + "learning_rate": 7.916373479595507e-08, + "loss": 1.2011, + "mean_token_accuracy": 0.6652053311467171, + "num_tokens": 11837753.0, + "step": 1510 + }, + { + "entropy": 1.8089916795492171, + "epoch": 9.6848, + "grad_norm": 2.8050880432128906, + "learning_rate": 5.535791134809176e-08, + "loss": 1.218, + "mean_token_accuracy": 0.6629775419831276, + "num_tokens": 11915480.0, + "step": 1520 + }, + { + "entropy": 1.7926248282194137, + "epoch": 9.7488, + "grad_norm": 2.947237491607666, + "learning_rate": 3.57889485292251e-08, + "loss": 1.2402, + "mean_token_accuracy": 0.6560651332139968, + "num_tokens": 11994204.0, + "step": 1530 + }, + { + "entropy": 1.788163235783577, + "epoch": 9.8128, + "grad_norm": 3.0515170097351074, + "learning_rate": 2.046518358944094e-08, + "loss": 1.2018, + "mean_token_accuracy": 0.66530032902956, + "num_tokens": 12070332.0, + "step": 1540 + }, + { + "entropy": 1.798910641670227, + "epoch": 9.8768, + "grad_norm": 2.6602368354797363, + "learning_rate": 9.393145135377924e-09, + "loss": 1.2375, + "mean_token_accuracy": 0.6554615125060081, + "num_tokens": 12149645.0, + "step": 1550 + }, + { + "entropy": 1.794683536887169, + "epoch": 9.9408, + "grad_norm": 2.509953022003174, + "learning_rate": 2.5775503487501795e-09, + "loss": 1.2262, + "mean_token_accuracy": 0.6570453852415085, + "num_tokens": 12229123.0, + "step": 1560 + }, + { + "entropy": 1.7768772969374786, + "epoch": 10.0, + "grad_norm": 6.54733419418335, + "learning_rate": 2.1302976616066616e-11, + "loss": 1.1615, + "mean_token_accuracy": 0.6717051477045626, + "num_tokens": 12300470.0, + "step": 1570 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.1844416691327468e+18, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1570/training_args.bin b/checkpoint-1570/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-1570/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/checkpoint-314/README.md b/checkpoint-314/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-314/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-314/adapter_config.json b/checkpoint-314/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-314/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-314/adapter_model.safetensors b/checkpoint-314/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1497cbb01876628525b4c3cf892284ef6002a65b --- /dev/null +++ b/checkpoint-314/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb76b2e04ff4b02f84480be89cbda80797d815183439975cf85a6a2a7e9cbe03 +size 335604696 diff --git a/checkpoint-314/chat_template.jinja b/checkpoint-314/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-314/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-314/optimizer.pt b/checkpoint-314/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddcf422ffdc5e6e0c82a5f9dd189997d96abfb1f --- /dev/null +++ b/checkpoint-314/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1404c863c76e13aae7eebfd4a2b97200c4b8f4332ef6b1e0b1bc92e9a14a6f7 +size 671473443 diff --git a/checkpoint-314/rng_state.pth b/checkpoint-314/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..67c4bdcf55ed8ff58c3141713bb2b726866409a9 --- /dev/null +++ b/checkpoint-314/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d913924183d71c8ca6ea255b55de0024909cf9f2a175c55d3cdcfda62e12e33e +size 14645 diff --git a/checkpoint-314/scheduler.pt b/checkpoint-314/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..61055e52be486b3cc0725ae862790294d1a3279f --- /dev/null +++ b/checkpoint-314/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a5a88df3479d141a6ba13f5c8eaf9a5cec84a6e97c83557dfa3d5a51db2b9be +size 1465 diff --git a/checkpoint-314/special_tokens_map.json b/checkpoint-314/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-314/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-314/tokenizer.json b/checkpoint-314/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-314/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-314/tokenizer_config.json b/checkpoint-314/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-314/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-314/trainer_state.json b/checkpoint-314/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..99b389aeb91971ab889d1d7df4970ac1abd743bf --- /dev/null +++ b/checkpoint-314/trainer_state.json @@ -0,0 +1,344 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 314, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + }, + { + "entropy": 2.20564815804765, + "epoch": 1.0192, + "grad_norm": 0.570175290107727, + "learning_rate": 1.9738672135236218e-05, + "loss": 1.6118, + "mean_token_accuracy": 0.582583570802534, + "num_tokens": 1254363.0, + "step": 160 + }, + { + "entropy": 2.1847074955701826, + "epoch": 1.0832, + "grad_norm": 0.5836730003356934, + "learning_rate": 1.968972097000843e-05, + "loss": 1.6172, + "mean_token_accuracy": 0.5812226444482803, + "num_tokens": 1330281.0, + "step": 170 + }, + { + "entropy": 2.1814055383205413, + "epoch": 1.1472, + "grad_norm": 0.5746439695358276, + "learning_rate": 1.96366415519066e-05, + "loss": 1.6192, + "mean_token_accuracy": 0.5789176046848297, + "num_tokens": 1409407.0, + "step": 180 + }, + { + "entropy": 2.2038993716239927, + "epoch": 1.2112, + "grad_norm": 0.5652104616165161, + "learning_rate": 1.957945649512788e-05, + "loss": 1.6166, + "mean_token_accuracy": 0.5809548154473305, + "num_tokens": 1489034.0, + "step": 190 + }, + { + "entropy": 2.173789343237877, + "epoch": 1.2752, + "grad_norm": 0.6653291583061218, + "learning_rate": 1.951819016305442e-05, + "loss": 1.62, + "mean_token_accuracy": 0.5827470317482948, + "num_tokens": 1568549.0, + "step": 200 + }, + { + "entropy": 2.1907752990722655, + "epoch": 1.3392, + "grad_norm": 0.7024573087692261, + "learning_rate": 1.9452868657873513e-05, + "loss": 1.6397, + "mean_token_accuracy": 0.5796025812625885, + "num_tokens": 1647404.0, + "step": 210 + }, + { + "entropy": 2.189376249909401, + "epoch": 1.4032, + "grad_norm": 0.5727422833442688, + "learning_rate": 1.9383519809456862e-05, + "loss": 1.6349, + "mean_token_accuracy": 0.5815459445118905, + "num_tokens": 1728421.0, + "step": 220 + }, + { + "entropy": 2.209022229909897, + "epoch": 1.4672, + "grad_norm": 0.6421232223510742, + "learning_rate": 1.931017316350384e-05, + "loss": 1.6425, + "mean_token_accuracy": 0.5790404245257378, + "num_tokens": 1806891.0, + "step": 230 + }, + { + "entropy": 2.2337595343589784, + "epoch": 1.5312000000000001, + "grad_norm": 0.6296209692955017, + "learning_rate": 1.9232859968953702e-05, + "loss": 1.624, + "mean_token_accuracy": 0.5814317353069782, + "num_tokens": 1883100.0, + "step": 240 + }, + { + "entropy": 2.205833575129509, + "epoch": 1.5952, + "grad_norm": 0.6371021866798401, + "learning_rate": 1.9151613164672136e-05, + "loss": 1.6284, + "mean_token_accuracy": 0.5819905593991279, + "num_tokens": 1961317.0, + "step": 250 + }, + { + "entropy": 2.205822005867958, + "epoch": 1.6592, + "grad_norm": 0.6950616836547852, + "learning_rate": 1.9066467365417844e-05, + "loss": 1.6374, + "mean_token_accuracy": 0.5760326236486435, + "num_tokens": 2042881.0, + "step": 260 + }, + { + "entropy": 2.2163637399673464, + "epoch": 1.7231999999999998, + "grad_norm": 0.7801616191864014, + "learning_rate": 1.8977458847095117e-05, + "loss": 1.663, + "mean_token_accuracy": 0.5744953289628029, + "num_tokens": 2121403.0, + "step": 270 + }, + { + "entropy": 2.199243775010109, + "epoch": 1.7872, + "grad_norm": 0.6671239733695984, + "learning_rate": 1.888462553129867e-05, + "loss": 1.6456, + "mean_token_accuracy": 0.579181258380413, + "num_tokens": 2200908.0, + "step": 280 + }, + { + "entropy": 2.214826595783234, + "epoch": 1.8512, + "grad_norm": 0.7415009140968323, + "learning_rate": 1.878800696915737e-05, + "loss": 1.6113, + "mean_token_accuracy": 0.5840038731694221, + "num_tokens": 2278414.0, + "step": 290 + }, + { + "entropy": 2.187604659795761, + "epoch": 1.9152, + "grad_norm": 0.662319540977478, + "learning_rate": 1.868764432448369e-05, + "loss": 1.6182, + "mean_token_accuracy": 0.580166706442833, + "num_tokens": 2355826.0, + "step": 300 + }, + { + "entropy": 2.2184703826904295, + "epoch": 1.9792, + "grad_norm": 0.7123025059700012, + "learning_rate": 1.8583580356236065e-05, + "loss": 1.655, + "mean_token_accuracy": 0.5762834578752518, + "num_tokens": 2434933.0, + "step": 310 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.3696468333376307e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-314/training_args.bin b/checkpoint-314/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-314/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/checkpoint-471/README.md b/checkpoint-471/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-471/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-471/adapter_config.json b/checkpoint-471/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-471/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-471/adapter_model.safetensors b/checkpoint-471/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ca1ec9672b2b4fb5054260bcfe72ad57bb8de3f --- /dev/null +++ b/checkpoint-471/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d221d525235b5173ce9e114620aaf33d0958b5ad6a947bf68d41006aa0a8a032 +size 335604696 diff --git a/checkpoint-471/chat_template.jinja b/checkpoint-471/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-471/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-471/optimizer.pt b/checkpoint-471/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5067d412fea028b1c48352eb6aaafcb70f2831d7 --- /dev/null +++ b/checkpoint-471/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef03231c2a5725344a348d8f7f9656817e9f2958e8287789eaf1aafb6a027591 +size 671473443 diff --git a/checkpoint-471/rng_state.pth b/checkpoint-471/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a14f3b57bd8513173b08ef192815069b949199a --- /dev/null +++ b/checkpoint-471/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cdfb028150b18d4f38d985d26f81738d0f66bbc5be89d994927da7ffc7765cc +size 14645 diff --git a/checkpoint-471/scheduler.pt b/checkpoint-471/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b759490ecc1bae3f86dc9f3a2a0e61700ede343 --- /dev/null +++ b/checkpoint-471/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe6a41ddfe79f034e591e49dab28464f43c859c8aee97fa435426dca85378a5d +size 1465 diff --git a/checkpoint-471/special_tokens_map.json b/checkpoint-471/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-471/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-471/tokenizer.json b/checkpoint-471/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-471/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-471/tokenizer_config.json b/checkpoint-471/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-471/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-471/trainer_state.json b/checkpoint-471/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..222490699df3238ed4e9945ec2422317fa39d6d2 --- /dev/null +++ b/checkpoint-471/trainer_state.json @@ -0,0 +1,504 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 471, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + }, + { + "entropy": 2.20564815804765, + "epoch": 1.0192, + "grad_norm": 0.570175290107727, + "learning_rate": 1.9738672135236218e-05, + "loss": 1.6118, + "mean_token_accuracy": 0.582583570802534, + "num_tokens": 1254363.0, + "step": 160 + }, + { + "entropy": 2.1847074955701826, + "epoch": 1.0832, + "grad_norm": 0.5836730003356934, + "learning_rate": 1.968972097000843e-05, + "loss": 1.6172, + "mean_token_accuracy": 0.5812226444482803, + "num_tokens": 1330281.0, + "step": 170 + }, + { + "entropy": 2.1814055383205413, + "epoch": 1.1472, + "grad_norm": 0.5746439695358276, + "learning_rate": 1.96366415519066e-05, + "loss": 1.6192, + "mean_token_accuracy": 0.5789176046848297, + "num_tokens": 1409407.0, + "step": 180 + }, + { + "entropy": 2.2038993716239927, + "epoch": 1.2112, + "grad_norm": 0.5652104616165161, + "learning_rate": 1.957945649512788e-05, + "loss": 1.6166, + "mean_token_accuracy": 0.5809548154473305, + "num_tokens": 1489034.0, + "step": 190 + }, + { + "entropy": 2.173789343237877, + "epoch": 1.2752, + "grad_norm": 0.6653291583061218, + "learning_rate": 1.951819016305442e-05, + "loss": 1.62, + "mean_token_accuracy": 0.5827470317482948, + "num_tokens": 1568549.0, + "step": 200 + }, + { + "entropy": 2.1907752990722655, + "epoch": 1.3392, + "grad_norm": 0.7024573087692261, + "learning_rate": 1.9452868657873513e-05, + "loss": 1.6397, + "mean_token_accuracy": 0.5796025812625885, + "num_tokens": 1647404.0, + "step": 210 + }, + { + "entropy": 2.189376249909401, + "epoch": 1.4032, + "grad_norm": 0.5727422833442688, + "learning_rate": 1.9383519809456862e-05, + "loss": 1.6349, + "mean_token_accuracy": 0.5815459445118905, + "num_tokens": 1728421.0, + "step": 220 + }, + { + "entropy": 2.209022229909897, + "epoch": 1.4672, + "grad_norm": 0.6421232223510742, + "learning_rate": 1.931017316350384e-05, + "loss": 1.6425, + "mean_token_accuracy": 0.5790404245257378, + "num_tokens": 1806891.0, + "step": 230 + }, + { + "entropy": 2.2337595343589784, + "epoch": 1.5312000000000001, + "grad_norm": 0.6296209692955017, + "learning_rate": 1.9232859968953702e-05, + "loss": 1.624, + "mean_token_accuracy": 0.5814317353069782, + "num_tokens": 1883100.0, + "step": 240 + }, + { + "entropy": 2.205833575129509, + "epoch": 1.5952, + "grad_norm": 0.6371021866798401, + "learning_rate": 1.9151613164672136e-05, + "loss": 1.6284, + "mean_token_accuracy": 0.5819905593991279, + "num_tokens": 1961317.0, + "step": 250 + }, + { + "entropy": 2.205822005867958, + "epoch": 1.6592, + "grad_norm": 0.6950616836547852, + "learning_rate": 1.9066467365417844e-05, + "loss": 1.6374, + "mean_token_accuracy": 0.5760326236486435, + "num_tokens": 2042881.0, + "step": 260 + }, + { + "entropy": 2.2163637399673464, + "epoch": 1.7231999999999998, + "grad_norm": 0.7801616191864014, + "learning_rate": 1.8977458847095117e-05, + "loss": 1.663, + "mean_token_accuracy": 0.5744953289628029, + "num_tokens": 2121403.0, + "step": 270 + }, + { + "entropy": 2.199243775010109, + "epoch": 1.7872, + "grad_norm": 0.6671239733695984, + "learning_rate": 1.888462553129867e-05, + "loss": 1.6456, + "mean_token_accuracy": 0.579181258380413, + "num_tokens": 2200908.0, + "step": 280 + }, + { + "entropy": 2.214826595783234, + "epoch": 1.8512, + "grad_norm": 0.7415009140968323, + "learning_rate": 1.878800696915737e-05, + "loss": 1.6113, + "mean_token_accuracy": 0.5840038731694221, + "num_tokens": 2278414.0, + "step": 290 + }, + { + "entropy": 2.187604659795761, + "epoch": 1.9152, + "grad_norm": 0.662319540977478, + "learning_rate": 1.868764432448369e-05, + "loss": 1.6182, + "mean_token_accuracy": 0.580166706442833, + "num_tokens": 2355826.0, + "step": 300 + }, + { + "entropy": 2.2184703826904295, + "epoch": 1.9792, + "grad_norm": 0.7123025059700012, + "learning_rate": 1.8583580356236065e-05, + "loss": 1.655, + "mean_token_accuracy": 0.5762834578752518, + "num_tokens": 2434933.0, + "step": 310 + }, + { + "entropy": 2.1887036239778674, + "epoch": 2.0384, + "grad_norm": 0.6846157312393188, + "learning_rate": 1.8475859400301708e-05, + "loss": 1.5935, + "mean_token_accuracy": 0.5881956976813238, + "num_tokens": 2507166.0, + "step": 320 + }, + { + "entropy": 2.102977079153061, + "epoch": 2.1024, + "grad_norm": 0.7967628240585327, + "learning_rate": 1.8364527350607527e-05, + "loss": 1.5405, + "mean_token_accuracy": 0.5946892097592353, + "num_tokens": 2584298.0, + "step": 330 + }, + { + "entropy": 2.118516767024994, + "epoch": 2.1664, + "grad_norm": 0.7417224645614624, + "learning_rate": 1.824963163956726e-05, + "loss": 1.5727, + "mean_token_accuracy": 0.5870080485939979, + "num_tokens": 2663601.0, + "step": 340 + }, + { + "entropy": 2.104418155550957, + "epoch": 2.2304, + "grad_norm": 0.7956721782684326, + "learning_rate": 1.8131221217873175e-05, + "loss": 1.5575, + "mean_token_accuracy": 0.5936456203460694, + "num_tokens": 2744783.0, + "step": 350 + }, + { + "entropy": 2.129578319191933, + "epoch": 2.2944, + "grad_norm": 0.769292950630188, + "learning_rate": 1.8009346533640877e-05, + "loss": 1.5878, + "mean_token_accuracy": 0.5841517195105552, + "num_tokens": 2823023.0, + "step": 360 + }, + { + "entropy": 2.097687366604805, + "epoch": 2.3584, + "grad_norm": 0.9341740608215332, + "learning_rate": 1.7884059510916167e-05, + "loss": 1.5346, + "mean_token_accuracy": 0.599460557103157, + "num_tokens": 2899598.0, + "step": 370 + }, + { + "entropy": 2.151599031686783, + "epoch": 2.4224, + "grad_norm": 0.8752340078353882, + "learning_rate": 1.7755413527553087e-05, + "loss": 1.5984, + "mean_token_accuracy": 0.585393351316452, + "num_tokens": 2978519.0, + "step": 380 + }, + { + "entropy": 2.1223404884338377, + "epoch": 2.4864, + "grad_norm": 1.0296390056610107, + "learning_rate": 1.7623463392472574e-05, + "loss": 1.5232, + "mean_token_accuracy": 0.595654422044754, + "num_tokens": 3055327.0, + "step": 390 + }, + { + "entropy": 2.16276493370533, + "epoch": 2.5504, + "grad_norm": 0.9905762672424316, + "learning_rate": 1.748826532231142e-05, + "loss": 1.6049, + "mean_token_accuracy": 0.5822189599275589, + "num_tokens": 3135348.0, + "step": 400 + }, + { + "entropy": 2.127479985356331, + "epoch": 2.6144, + "grad_norm": 0.851375162601471, + "learning_rate": 1.7349876917471474e-05, + "loss": 1.5842, + "mean_token_accuracy": 0.5855211839079857, + "num_tokens": 3213122.0, + "step": 410 + }, + { + "entropy": 2.167752879858017, + "epoch": 2.6784, + "grad_norm": 0.975143313407898, + "learning_rate": 1.7208357137579318e-05, + "loss": 1.5918, + "mean_token_accuracy": 0.5839722648262977, + "num_tokens": 3289583.0, + "step": 420 + }, + { + "entropy": 2.127084198594093, + "epoch": 2.7424, + "grad_norm": 0.8077936768531799, + "learning_rate": 1.7063766276366814e-05, + "loss": 1.5916, + "mean_token_accuracy": 0.5900941833853721, + "num_tokens": 3369740.0, + "step": 430 + }, + { + "entropy": 2.1315969794988634, + "epoch": 2.8064, + "grad_norm": 0.9403624534606934, + "learning_rate": 1.6916165935983323e-05, + "loss": 1.5713, + "mean_token_accuracy": 0.5892721861600876, + "num_tokens": 3448328.0, + "step": 440 + }, + { + "entropy": 2.130605939030647, + "epoch": 2.8704, + "grad_norm": 0.8252040147781372, + "learning_rate": 1.676561900075041e-05, + "loss": 1.6003, + "mean_token_accuracy": 0.5845118075609207, + "num_tokens": 3529853.0, + "step": 450 + }, + { + "entropy": 2.112012493610382, + "epoch": 2.9344, + "grad_norm": 0.9267668724060059, + "learning_rate": 1.6612189610370336e-05, + "loss": 1.5796, + "mean_token_accuracy": 0.5887707889080047, + "num_tokens": 3610922.0, + "step": 460 + }, + { + "entropy": 2.100590059161186, + "epoch": 2.9984, + "grad_norm": 0.8996879458427429, + "learning_rate": 1.6455943132599698e-05, + "loss": 1.5483, + "mean_token_accuracy": 0.5934251204133034, + "num_tokens": 3688391.0, + "step": 470 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.547818809158533e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-471/training_args.bin b/checkpoint-471/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-471/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/checkpoint-628/README.md b/checkpoint-628/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-628/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-628/adapter_config.json b/checkpoint-628/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-628/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-628/adapter_model.safetensors b/checkpoint-628/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d5ab21ad802db9909da0550cf122c52ee286c9b --- /dev/null +++ b/checkpoint-628/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c29dbb6a8728aa8ed40462030877d1e5fbf7f8325728ea4928c3077073a81f5 +size 335604696 diff --git a/checkpoint-628/chat_template.jinja b/checkpoint-628/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-628/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-628/optimizer.pt b/checkpoint-628/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1815b73bae2c2965701d6f920739134e4f7b137 --- /dev/null +++ b/checkpoint-628/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e11531ed0c3a84897784ee462f101135ac11cf9bd6b6245367bc83da48ad057 +size 671473443 diff --git a/checkpoint-628/rng_state.pth b/checkpoint-628/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..73e0738dabdba7a4bb6d80f0d84a6b4a3e92ac79 --- /dev/null +++ b/checkpoint-628/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c6fd5967cb39ba0a6bb53fb759a329299c4ba0dc01e757494e5d63e2363b4a8 +size 14645 diff --git a/checkpoint-628/scheduler.pt b/checkpoint-628/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d475bc916164294fe48ae1947ce9536b612d32b6 --- /dev/null +++ b/checkpoint-628/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5258ff8912fb0085185a2678340d994d3a359e26e2cce028ab18629d2c3e64 +size 1465 diff --git a/checkpoint-628/special_tokens_map.json b/checkpoint-628/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-628/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-628/tokenizer.json b/checkpoint-628/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-628/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-628/tokenizer_config.json b/checkpoint-628/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-628/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-628/trainer_state.json b/checkpoint-628/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..32e8bbc2300d13bcc63423f0f510e96f007932e8 --- /dev/null +++ b/checkpoint-628/trainer_state.json @@ -0,0 +1,654 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 628, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + }, + { + "entropy": 2.20564815804765, + "epoch": 1.0192, + "grad_norm": 0.570175290107727, + "learning_rate": 1.9738672135236218e-05, + "loss": 1.6118, + "mean_token_accuracy": 0.582583570802534, + "num_tokens": 1254363.0, + "step": 160 + }, + { + "entropy": 2.1847074955701826, + "epoch": 1.0832, + "grad_norm": 0.5836730003356934, + "learning_rate": 1.968972097000843e-05, + "loss": 1.6172, + "mean_token_accuracy": 0.5812226444482803, + "num_tokens": 1330281.0, + "step": 170 + }, + { + "entropy": 2.1814055383205413, + "epoch": 1.1472, + "grad_norm": 0.5746439695358276, + "learning_rate": 1.96366415519066e-05, + "loss": 1.6192, + "mean_token_accuracy": 0.5789176046848297, + "num_tokens": 1409407.0, + "step": 180 + }, + { + "entropy": 2.2038993716239927, + "epoch": 1.2112, + "grad_norm": 0.5652104616165161, + "learning_rate": 1.957945649512788e-05, + "loss": 1.6166, + "mean_token_accuracy": 0.5809548154473305, + "num_tokens": 1489034.0, + "step": 190 + }, + { + "entropy": 2.173789343237877, + "epoch": 1.2752, + "grad_norm": 0.6653291583061218, + "learning_rate": 1.951819016305442e-05, + "loss": 1.62, + "mean_token_accuracy": 0.5827470317482948, + "num_tokens": 1568549.0, + "step": 200 + }, + { + "entropy": 2.1907752990722655, + "epoch": 1.3392, + "grad_norm": 0.7024573087692261, + "learning_rate": 1.9452868657873513e-05, + "loss": 1.6397, + "mean_token_accuracy": 0.5796025812625885, + "num_tokens": 1647404.0, + "step": 210 + }, + { + "entropy": 2.189376249909401, + "epoch": 1.4032, + "grad_norm": 0.5727422833442688, + "learning_rate": 1.9383519809456862e-05, + "loss": 1.6349, + "mean_token_accuracy": 0.5815459445118905, + "num_tokens": 1728421.0, + "step": 220 + }, + { + "entropy": 2.209022229909897, + "epoch": 1.4672, + "grad_norm": 0.6421232223510742, + "learning_rate": 1.931017316350384e-05, + "loss": 1.6425, + "mean_token_accuracy": 0.5790404245257378, + "num_tokens": 1806891.0, + "step": 230 + }, + { + "entropy": 2.2337595343589784, + "epoch": 1.5312000000000001, + "grad_norm": 0.6296209692955017, + "learning_rate": 1.9232859968953702e-05, + "loss": 1.624, + "mean_token_accuracy": 0.5814317353069782, + "num_tokens": 1883100.0, + "step": 240 + }, + { + "entropy": 2.205833575129509, + "epoch": 1.5952, + "grad_norm": 0.6371021866798401, + "learning_rate": 1.9151613164672136e-05, + "loss": 1.6284, + "mean_token_accuracy": 0.5819905593991279, + "num_tokens": 1961317.0, + "step": 250 + }, + { + "entropy": 2.205822005867958, + "epoch": 1.6592, + "grad_norm": 0.6950616836547852, + "learning_rate": 1.9066467365417844e-05, + "loss": 1.6374, + "mean_token_accuracy": 0.5760326236486435, + "num_tokens": 2042881.0, + "step": 260 + }, + { + "entropy": 2.2163637399673464, + "epoch": 1.7231999999999998, + "grad_norm": 0.7801616191864014, + "learning_rate": 1.8977458847095117e-05, + "loss": 1.663, + "mean_token_accuracy": 0.5744953289628029, + "num_tokens": 2121403.0, + "step": 270 + }, + { + "entropy": 2.199243775010109, + "epoch": 1.7872, + "grad_norm": 0.6671239733695984, + "learning_rate": 1.888462553129867e-05, + "loss": 1.6456, + "mean_token_accuracy": 0.579181258380413, + "num_tokens": 2200908.0, + "step": 280 + }, + { + "entropy": 2.214826595783234, + "epoch": 1.8512, + "grad_norm": 0.7415009140968323, + "learning_rate": 1.878800696915737e-05, + "loss": 1.6113, + "mean_token_accuracy": 0.5840038731694221, + "num_tokens": 2278414.0, + "step": 290 + }, + { + "entropy": 2.187604659795761, + "epoch": 1.9152, + "grad_norm": 0.662319540977478, + "learning_rate": 1.868764432448369e-05, + "loss": 1.6182, + "mean_token_accuracy": 0.580166706442833, + "num_tokens": 2355826.0, + "step": 300 + }, + { + "entropy": 2.2184703826904295, + "epoch": 1.9792, + "grad_norm": 0.7123025059700012, + "learning_rate": 1.8583580356236065e-05, + "loss": 1.655, + "mean_token_accuracy": 0.5762834578752518, + "num_tokens": 2434933.0, + "step": 310 + }, + { + "entropy": 2.1887036239778674, + "epoch": 2.0384, + "grad_norm": 0.6846157312393188, + "learning_rate": 1.8475859400301708e-05, + "loss": 1.5935, + "mean_token_accuracy": 0.5881956976813238, + "num_tokens": 2507166.0, + "step": 320 + }, + { + "entropy": 2.102977079153061, + "epoch": 2.1024, + "grad_norm": 0.7967628240585327, + "learning_rate": 1.8364527350607527e-05, + "loss": 1.5405, + "mean_token_accuracy": 0.5946892097592353, + "num_tokens": 2584298.0, + "step": 330 + }, + { + "entropy": 2.118516767024994, + "epoch": 2.1664, + "grad_norm": 0.7417224645614624, + "learning_rate": 1.824963163956726e-05, + "loss": 1.5727, + "mean_token_accuracy": 0.5870080485939979, + "num_tokens": 2663601.0, + "step": 340 + }, + { + "entropy": 2.104418155550957, + "epoch": 2.2304, + "grad_norm": 0.7956721782684326, + "learning_rate": 1.8131221217873175e-05, + "loss": 1.5575, + "mean_token_accuracy": 0.5936456203460694, + "num_tokens": 2744783.0, + "step": 350 + }, + { + "entropy": 2.129578319191933, + "epoch": 2.2944, + "grad_norm": 0.769292950630188, + "learning_rate": 1.8009346533640877e-05, + "loss": 1.5878, + "mean_token_accuracy": 0.5841517195105552, + "num_tokens": 2823023.0, + "step": 360 + }, + { + "entropy": 2.097687366604805, + "epoch": 2.3584, + "grad_norm": 0.9341740608215332, + "learning_rate": 1.7884059510916167e-05, + "loss": 1.5346, + "mean_token_accuracy": 0.599460557103157, + "num_tokens": 2899598.0, + "step": 370 + }, + { + "entropy": 2.151599031686783, + "epoch": 2.4224, + "grad_norm": 0.8752340078353882, + "learning_rate": 1.7755413527553087e-05, + "loss": 1.5984, + "mean_token_accuracy": 0.585393351316452, + "num_tokens": 2978519.0, + "step": 380 + }, + { + "entropy": 2.1223404884338377, + "epoch": 2.4864, + "grad_norm": 1.0296390056610107, + "learning_rate": 1.7623463392472574e-05, + "loss": 1.5232, + "mean_token_accuracy": 0.595654422044754, + "num_tokens": 3055327.0, + "step": 390 + }, + { + "entropy": 2.16276493370533, + "epoch": 2.5504, + "grad_norm": 0.9905762672424316, + "learning_rate": 1.748826532231142e-05, + "loss": 1.6049, + "mean_token_accuracy": 0.5822189599275589, + "num_tokens": 3135348.0, + "step": 400 + }, + { + "entropy": 2.127479985356331, + "epoch": 2.6144, + "grad_norm": 0.851375162601471, + "learning_rate": 1.7349876917471474e-05, + "loss": 1.5842, + "mean_token_accuracy": 0.5855211839079857, + "num_tokens": 3213122.0, + "step": 410 + }, + { + "entropy": 2.167752879858017, + "epoch": 2.6784, + "grad_norm": 0.975143313407898, + "learning_rate": 1.7208357137579318e-05, + "loss": 1.5918, + "mean_token_accuracy": 0.5839722648262977, + "num_tokens": 3289583.0, + "step": 420 + }, + { + "entropy": 2.127084198594093, + "epoch": 2.7424, + "grad_norm": 0.8077936768531799, + "learning_rate": 1.7063766276366814e-05, + "loss": 1.5916, + "mean_token_accuracy": 0.5900941833853721, + "num_tokens": 3369740.0, + "step": 430 + }, + { + "entropy": 2.1315969794988634, + "epoch": 2.8064, + "grad_norm": 0.9403624534606934, + "learning_rate": 1.6916165935983323e-05, + "loss": 1.5713, + "mean_token_accuracy": 0.5892721861600876, + "num_tokens": 3448328.0, + "step": 440 + }, + { + "entropy": 2.130605939030647, + "epoch": 2.8704, + "grad_norm": 0.8252040147781372, + "learning_rate": 1.676561900075041e-05, + "loss": 1.6003, + "mean_token_accuracy": 0.5845118075609207, + "num_tokens": 3529853.0, + "step": 450 + }, + { + "entropy": 2.112012493610382, + "epoch": 2.9344, + "grad_norm": 0.9267668724060059, + "learning_rate": 1.6612189610370336e-05, + "loss": 1.5796, + "mean_token_accuracy": 0.5887707889080047, + "num_tokens": 3610922.0, + "step": 460 + }, + { + "entropy": 2.100590059161186, + "epoch": 2.9984, + "grad_norm": 0.8996879458427429, + "learning_rate": 1.6455943132599698e-05, + "loss": 1.5483, + "mean_token_accuracy": 0.5934251204133034, + "num_tokens": 3688391.0, + "step": 470 + }, + { + "entropy": 2.1115864160898568, + "epoch": 3.0576, + "grad_norm": 1.097270131111145, + "learning_rate": 1.6296946135399835e-05, + "loss": 1.5506, + "mean_token_accuracy": 0.592829834770512, + "num_tokens": 3758747.0, + "step": 480 + }, + { + "entropy": 2.0610430628061294, + "epoch": 3.1216, + "grad_norm": 1.176645278930664, + "learning_rate": 1.613526635857591e-05, + "loss": 1.4461, + "mean_token_accuracy": 0.6111307457089424, + "num_tokens": 3834689.0, + "step": 490 + }, + { + "entropy": 2.0154007196426393, + "epoch": 3.1856, + "grad_norm": 1.1834276914596558, + "learning_rate": 1.5970972684916754e-05, + "loss": 1.4852, + "mean_token_accuracy": 0.6026980608701706, + "num_tokens": 3916450.0, + "step": 500 + }, + { + "entropy": 2.0441433399915696, + "epoch": 3.2496, + "grad_norm": 1.159286379814148, + "learning_rate": 1.5804135110847708e-05, + "loss": 1.4978, + "mean_token_accuracy": 0.6042912915349007, + "num_tokens": 3998511.0, + "step": 510 + }, + { + "entropy": 2.0493109285831452, + "epoch": 3.3136, + "grad_norm": 1.2141708135604858, + "learning_rate": 1.5634824716609037e-05, + "loss": 1.5018, + "mean_token_accuracy": 0.5995921581983567, + "num_tokens": 4077676.0, + "step": 520 + }, + { + "entropy": 2.0533218771219253, + "epoch": 3.3776, + "grad_norm": 1.1630637645721436, + "learning_rate": 1.5463113635972577e-05, + "loss": 1.499, + "mean_token_accuracy": 0.6046154126524925, + "num_tokens": 4155264.0, + "step": 530 + }, + { + "entropy": 2.0600034058094026, + "epoch": 3.4416, + "grad_norm": 1.2523504495620728, + "learning_rate": 1.528907502550954e-05, + "loss": 1.521, + "mean_token_accuracy": 0.6000443026423454, + "num_tokens": 4233655.0, + "step": 540 + }, + { + "entropy": 2.0414596855640412, + "epoch": 3.5056000000000003, + "grad_norm": 1.3990252017974854, + "learning_rate": 1.5112783033422547e-05, + "loss": 1.4899, + "mean_token_accuracy": 0.6026965886354446, + "num_tokens": 4311644.0, + "step": 550 + }, + { + "entropy": 2.061043033003807, + "epoch": 3.5696, + "grad_norm": 1.1884260177612305, + "learning_rate": 1.4934312767955193e-05, + "loss": 1.5143, + "mean_token_accuracy": 0.5981319859623909, + "num_tokens": 4390933.0, + "step": 560 + }, + { + "entropy": 2.034099668264389, + "epoch": 3.6336, + "grad_norm": 1.2996599674224854, + "learning_rate": 1.4753740265392595e-05, + "loss": 1.4953, + "mean_token_accuracy": 0.6029247522354126, + "num_tokens": 4470462.0, + "step": 570 + }, + { + "entropy": 2.0379767954349517, + "epoch": 3.6976, + "grad_norm": 1.2936193943023682, + "learning_rate": 1.4571142457666536e-05, + "loss": 1.4965, + "mean_token_accuracy": 0.6041712030768395, + "num_tokens": 4549236.0, + "step": 580 + }, + { + "entropy": 2.040063351392746, + "epoch": 3.7616, + "grad_norm": 1.5094560384750366, + "learning_rate": 1.4386597139579041e-05, + "loss": 1.4979, + "mean_token_accuracy": 0.6051288455724716, + "num_tokens": 4628758.0, + "step": 590 + }, + { + "entropy": 1.9998936265707017, + "epoch": 3.8256, + "grad_norm": 1.3166426420211792, + "learning_rate": 1.4200182935658327e-05, + "loss": 1.459, + "mean_token_accuracy": 0.6084850415587425, + "num_tokens": 4708526.0, + "step": 600 + }, + { + "entropy": 2.0041965901851655, + "epoch": 3.8895999999999997, + "grad_norm": 1.2710400819778442, + "learning_rate": 1.4011979266661235e-05, + "loss": 1.4831, + "mean_token_accuracy": 0.6057328775525093, + "num_tokens": 4788733.0, + "step": 610 + }, + { + "entropy": 2.0265558779239656, + "epoch": 3.9536, + "grad_norm": 1.4318969249725342, + "learning_rate": 1.3822066315736477e-05, + "loss": 1.4966, + "mean_token_accuracy": 0.5994595810770988, + "num_tokens": 4866451.0, + "step": 620 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.736335851796562e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-628/training_args.bin b/checkpoint-628/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-628/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/checkpoint-785/README.md b/checkpoint-785/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-785/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-785/adapter_config.json b/checkpoint-785/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-785/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-785/adapter_model.safetensors b/checkpoint-785/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52ebf41ebfe16cf4759b602c42ef058a514e6ec3 --- /dev/null +++ b/checkpoint-785/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9ff3dd8fee2465055546db403aee154f93b7ab57f7a025dfb2f6a144a22beb +size 335604696 diff --git a/checkpoint-785/chat_template.jinja b/checkpoint-785/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-785/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-785/optimizer.pt b/checkpoint-785/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f2d45d691f3da75e64442d688a0d49887decbfa --- /dev/null +++ b/checkpoint-785/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a828a3515367131adf4294565cac50b5ce8d96ece64bd4be38b8e923f10be8f +size 671473443 diff --git a/checkpoint-785/rng_state.pth b/checkpoint-785/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a531329f39c6e691c0b79a2f306a8a3cad76aef6 --- /dev/null +++ b/checkpoint-785/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6c9fe713cee29a726e5278b87ae97dca52c4373434517557374165d85d68f3f +size 14645 diff --git a/checkpoint-785/scheduler.pt b/checkpoint-785/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..273687627f76276621a0a0707acfc23b6575aee3 --- /dev/null +++ b/checkpoint-785/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57daaaaef5a8f0e5ab8df77e3d7f286c8b64f140f431394dfb3a4382535fe70 +size 1465 diff --git a/checkpoint-785/special_tokens_map.json b/checkpoint-785/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-785/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-785/tokenizer.json b/checkpoint-785/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-785/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-785/tokenizer_config.json b/checkpoint-785/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-785/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-785/trainer_state.json b/checkpoint-785/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..37ad36cf2e80caf1dda2110b1f5ea46c3797d1d4 --- /dev/null +++ b/checkpoint-785/trainer_state.json @@ -0,0 +1,814 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 785, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + }, + { + "entropy": 2.20564815804765, + "epoch": 1.0192, + "grad_norm": 0.570175290107727, + "learning_rate": 1.9738672135236218e-05, + "loss": 1.6118, + "mean_token_accuracy": 0.582583570802534, + "num_tokens": 1254363.0, + "step": 160 + }, + { + "entropy": 2.1847074955701826, + "epoch": 1.0832, + "grad_norm": 0.5836730003356934, + "learning_rate": 1.968972097000843e-05, + "loss": 1.6172, + "mean_token_accuracy": 0.5812226444482803, + "num_tokens": 1330281.0, + "step": 170 + }, + { + "entropy": 2.1814055383205413, + "epoch": 1.1472, + "grad_norm": 0.5746439695358276, + "learning_rate": 1.96366415519066e-05, + "loss": 1.6192, + "mean_token_accuracy": 0.5789176046848297, + "num_tokens": 1409407.0, + "step": 180 + }, + { + "entropy": 2.2038993716239927, + "epoch": 1.2112, + "grad_norm": 0.5652104616165161, + "learning_rate": 1.957945649512788e-05, + "loss": 1.6166, + "mean_token_accuracy": 0.5809548154473305, + "num_tokens": 1489034.0, + "step": 190 + }, + { + "entropy": 2.173789343237877, + "epoch": 1.2752, + "grad_norm": 0.6653291583061218, + "learning_rate": 1.951819016305442e-05, + "loss": 1.62, + "mean_token_accuracy": 0.5827470317482948, + "num_tokens": 1568549.0, + "step": 200 + }, + { + "entropy": 2.1907752990722655, + "epoch": 1.3392, + "grad_norm": 0.7024573087692261, + "learning_rate": 1.9452868657873513e-05, + "loss": 1.6397, + "mean_token_accuracy": 0.5796025812625885, + "num_tokens": 1647404.0, + "step": 210 + }, + { + "entropy": 2.189376249909401, + "epoch": 1.4032, + "grad_norm": 0.5727422833442688, + "learning_rate": 1.9383519809456862e-05, + "loss": 1.6349, + "mean_token_accuracy": 0.5815459445118905, + "num_tokens": 1728421.0, + "step": 220 + }, + { + "entropy": 2.209022229909897, + "epoch": 1.4672, + "grad_norm": 0.6421232223510742, + "learning_rate": 1.931017316350384e-05, + "loss": 1.6425, + "mean_token_accuracy": 0.5790404245257378, + "num_tokens": 1806891.0, + "step": 230 + }, + { + "entropy": 2.2337595343589784, + "epoch": 1.5312000000000001, + "grad_norm": 0.6296209692955017, + "learning_rate": 1.9232859968953702e-05, + "loss": 1.624, + "mean_token_accuracy": 0.5814317353069782, + "num_tokens": 1883100.0, + "step": 240 + }, + { + "entropy": 2.205833575129509, + "epoch": 1.5952, + "grad_norm": 0.6371021866798401, + "learning_rate": 1.9151613164672136e-05, + "loss": 1.6284, + "mean_token_accuracy": 0.5819905593991279, + "num_tokens": 1961317.0, + "step": 250 + }, + { + "entropy": 2.205822005867958, + "epoch": 1.6592, + "grad_norm": 0.6950616836547852, + "learning_rate": 1.9066467365417844e-05, + "loss": 1.6374, + "mean_token_accuracy": 0.5760326236486435, + "num_tokens": 2042881.0, + "step": 260 + }, + { + "entropy": 2.2163637399673464, + "epoch": 1.7231999999999998, + "grad_norm": 0.7801616191864014, + "learning_rate": 1.8977458847095117e-05, + "loss": 1.663, + "mean_token_accuracy": 0.5744953289628029, + "num_tokens": 2121403.0, + "step": 270 + }, + { + "entropy": 2.199243775010109, + "epoch": 1.7872, + "grad_norm": 0.6671239733695984, + "learning_rate": 1.888462553129867e-05, + "loss": 1.6456, + "mean_token_accuracy": 0.579181258380413, + "num_tokens": 2200908.0, + "step": 280 + }, + { + "entropy": 2.214826595783234, + "epoch": 1.8512, + "grad_norm": 0.7415009140968323, + "learning_rate": 1.878800696915737e-05, + "loss": 1.6113, + "mean_token_accuracy": 0.5840038731694221, + "num_tokens": 2278414.0, + "step": 290 + }, + { + "entropy": 2.187604659795761, + "epoch": 1.9152, + "grad_norm": 0.662319540977478, + "learning_rate": 1.868764432448369e-05, + "loss": 1.6182, + "mean_token_accuracy": 0.580166706442833, + "num_tokens": 2355826.0, + "step": 300 + }, + { + "entropy": 2.2184703826904295, + "epoch": 1.9792, + "grad_norm": 0.7123025059700012, + "learning_rate": 1.8583580356236065e-05, + "loss": 1.655, + "mean_token_accuracy": 0.5762834578752518, + "num_tokens": 2434933.0, + "step": 310 + }, + { + "entropy": 2.1887036239778674, + "epoch": 2.0384, + "grad_norm": 0.6846157312393188, + "learning_rate": 1.8475859400301708e-05, + "loss": 1.5935, + "mean_token_accuracy": 0.5881956976813238, + "num_tokens": 2507166.0, + "step": 320 + }, + { + "entropy": 2.102977079153061, + "epoch": 2.1024, + "grad_norm": 0.7967628240585327, + "learning_rate": 1.8364527350607527e-05, + "loss": 1.5405, + "mean_token_accuracy": 0.5946892097592353, + "num_tokens": 2584298.0, + "step": 330 + }, + { + "entropy": 2.118516767024994, + "epoch": 2.1664, + "grad_norm": 0.7417224645614624, + "learning_rate": 1.824963163956726e-05, + "loss": 1.5727, + "mean_token_accuracy": 0.5870080485939979, + "num_tokens": 2663601.0, + "step": 340 + }, + { + "entropy": 2.104418155550957, + "epoch": 2.2304, + "grad_norm": 0.7956721782684326, + "learning_rate": 1.8131221217873175e-05, + "loss": 1.5575, + "mean_token_accuracy": 0.5936456203460694, + "num_tokens": 2744783.0, + "step": 350 + }, + { + "entropy": 2.129578319191933, + "epoch": 2.2944, + "grad_norm": 0.769292950630188, + "learning_rate": 1.8009346533640877e-05, + "loss": 1.5878, + "mean_token_accuracy": 0.5841517195105552, + "num_tokens": 2823023.0, + "step": 360 + }, + { + "entropy": 2.097687366604805, + "epoch": 2.3584, + "grad_norm": 0.9341740608215332, + "learning_rate": 1.7884059510916167e-05, + "loss": 1.5346, + "mean_token_accuracy": 0.599460557103157, + "num_tokens": 2899598.0, + "step": 370 + }, + { + "entropy": 2.151599031686783, + "epoch": 2.4224, + "grad_norm": 0.8752340078353882, + "learning_rate": 1.7755413527553087e-05, + "loss": 1.5984, + "mean_token_accuracy": 0.585393351316452, + "num_tokens": 2978519.0, + "step": 380 + }, + { + "entropy": 2.1223404884338377, + "epoch": 2.4864, + "grad_norm": 1.0296390056610107, + "learning_rate": 1.7623463392472574e-05, + "loss": 1.5232, + "mean_token_accuracy": 0.595654422044754, + "num_tokens": 3055327.0, + "step": 390 + }, + { + "entropy": 2.16276493370533, + "epoch": 2.5504, + "grad_norm": 0.9905762672424316, + "learning_rate": 1.748826532231142e-05, + "loss": 1.6049, + "mean_token_accuracy": 0.5822189599275589, + "num_tokens": 3135348.0, + "step": 400 + }, + { + "entropy": 2.127479985356331, + "epoch": 2.6144, + "grad_norm": 0.851375162601471, + "learning_rate": 1.7349876917471474e-05, + "loss": 1.5842, + "mean_token_accuracy": 0.5855211839079857, + "num_tokens": 3213122.0, + "step": 410 + }, + { + "entropy": 2.167752879858017, + "epoch": 2.6784, + "grad_norm": 0.975143313407898, + "learning_rate": 1.7208357137579318e-05, + "loss": 1.5918, + "mean_token_accuracy": 0.5839722648262977, + "num_tokens": 3289583.0, + "step": 420 + }, + { + "entropy": 2.127084198594093, + "epoch": 2.7424, + "grad_norm": 0.8077936768531799, + "learning_rate": 1.7063766276366814e-05, + "loss": 1.5916, + "mean_token_accuracy": 0.5900941833853721, + "num_tokens": 3369740.0, + "step": 430 + }, + { + "entropy": 2.1315969794988634, + "epoch": 2.8064, + "grad_norm": 0.9403624534606934, + "learning_rate": 1.6916165935983323e-05, + "loss": 1.5713, + "mean_token_accuracy": 0.5892721861600876, + "num_tokens": 3448328.0, + "step": 440 + }, + { + "entropy": 2.130605939030647, + "epoch": 2.8704, + "grad_norm": 0.8252040147781372, + "learning_rate": 1.676561900075041e-05, + "loss": 1.6003, + "mean_token_accuracy": 0.5845118075609207, + "num_tokens": 3529853.0, + "step": 450 + }, + { + "entropy": 2.112012493610382, + "epoch": 2.9344, + "grad_norm": 0.9267668724060059, + "learning_rate": 1.6612189610370336e-05, + "loss": 1.5796, + "mean_token_accuracy": 0.5887707889080047, + "num_tokens": 3610922.0, + "step": 460 + }, + { + "entropy": 2.100590059161186, + "epoch": 2.9984, + "grad_norm": 0.8996879458427429, + "learning_rate": 1.6455943132599698e-05, + "loss": 1.5483, + "mean_token_accuracy": 0.5934251204133034, + "num_tokens": 3688391.0, + "step": 470 + }, + { + "entropy": 2.1115864160898568, + "epoch": 3.0576, + "grad_norm": 1.097270131111145, + "learning_rate": 1.6296946135399835e-05, + "loss": 1.5506, + "mean_token_accuracy": 0.592829834770512, + "num_tokens": 3758747.0, + "step": 480 + }, + { + "entropy": 2.0610430628061294, + "epoch": 3.1216, + "grad_norm": 1.176645278930664, + "learning_rate": 1.613526635857591e-05, + "loss": 1.4461, + "mean_token_accuracy": 0.6111307457089424, + "num_tokens": 3834689.0, + "step": 490 + }, + { + "entropy": 2.0154007196426393, + "epoch": 3.1856, + "grad_norm": 1.1834276914596558, + "learning_rate": 1.5970972684916754e-05, + "loss": 1.4852, + "mean_token_accuracy": 0.6026980608701706, + "num_tokens": 3916450.0, + "step": 500 + }, + { + "entropy": 2.0441433399915696, + "epoch": 3.2496, + "grad_norm": 1.159286379814148, + "learning_rate": 1.5804135110847708e-05, + "loss": 1.4978, + "mean_token_accuracy": 0.6042912915349007, + "num_tokens": 3998511.0, + "step": 510 + }, + { + "entropy": 2.0493109285831452, + "epoch": 3.3136, + "grad_norm": 1.2141708135604858, + "learning_rate": 1.5634824716609037e-05, + "loss": 1.5018, + "mean_token_accuracy": 0.5995921581983567, + "num_tokens": 4077676.0, + "step": 520 + }, + { + "entropy": 2.0533218771219253, + "epoch": 3.3776, + "grad_norm": 1.1630637645721436, + "learning_rate": 1.5463113635972577e-05, + "loss": 1.499, + "mean_token_accuracy": 0.6046154126524925, + "num_tokens": 4155264.0, + "step": 530 + }, + { + "entropy": 2.0600034058094026, + "epoch": 3.4416, + "grad_norm": 1.2523504495620728, + "learning_rate": 1.528907502550954e-05, + "loss": 1.521, + "mean_token_accuracy": 0.6000443026423454, + "num_tokens": 4233655.0, + "step": 540 + }, + { + "entropy": 2.0414596855640412, + "epoch": 3.5056000000000003, + "grad_norm": 1.3990252017974854, + "learning_rate": 1.5112783033422547e-05, + "loss": 1.4899, + "mean_token_accuracy": 0.6026965886354446, + "num_tokens": 4311644.0, + "step": 550 + }, + { + "entropy": 2.061043033003807, + "epoch": 3.5696, + "grad_norm": 1.1884260177612305, + "learning_rate": 1.4934312767955193e-05, + "loss": 1.5143, + "mean_token_accuracy": 0.5981319859623909, + "num_tokens": 4390933.0, + "step": 560 + }, + { + "entropy": 2.034099668264389, + "epoch": 3.6336, + "grad_norm": 1.2996599674224854, + "learning_rate": 1.4753740265392595e-05, + "loss": 1.4953, + "mean_token_accuracy": 0.6029247522354126, + "num_tokens": 4470462.0, + "step": 570 + }, + { + "entropy": 2.0379767954349517, + "epoch": 3.6976, + "grad_norm": 1.2936193943023682, + "learning_rate": 1.4571142457666536e-05, + "loss": 1.4965, + "mean_token_accuracy": 0.6041712030768395, + "num_tokens": 4549236.0, + "step": 580 + }, + { + "entropy": 2.040063351392746, + "epoch": 3.7616, + "grad_norm": 1.5094560384750366, + "learning_rate": 1.4386597139579041e-05, + "loss": 1.4979, + "mean_token_accuracy": 0.6051288455724716, + "num_tokens": 4628758.0, + "step": 590 + }, + { + "entropy": 1.9998936265707017, + "epoch": 3.8256, + "grad_norm": 1.3166426420211792, + "learning_rate": 1.4200182935658327e-05, + "loss": 1.459, + "mean_token_accuracy": 0.6084850415587425, + "num_tokens": 4708526.0, + "step": 600 + }, + { + "entropy": 2.0041965901851655, + "epoch": 3.8895999999999997, + "grad_norm": 1.2710400819778442, + "learning_rate": 1.4011979266661235e-05, + "loss": 1.4831, + "mean_token_accuracy": 0.6057328775525093, + "num_tokens": 4788733.0, + "step": 610 + }, + { + "entropy": 2.0265558779239656, + "epoch": 3.9536, + "grad_norm": 1.4318969249725342, + "learning_rate": 1.3822066315736477e-05, + "loss": 1.4966, + "mean_token_accuracy": 0.5994595810770988, + "num_tokens": 4866451.0, + "step": 620 + }, + { + "entropy": 2.0692459924800977, + "epoch": 4.0128, + "grad_norm": 1.2546013593673706, + "learning_rate": 1.363052499426302e-05, + "loss": 1.503, + "mean_token_accuracy": 0.6039850309088424, + "num_tokens": 4936715.0, + "step": 630 + }, + { + "entropy": 1.9788923293352128, + "epoch": 4.0768, + "grad_norm": 1.416927456855774, + "learning_rate": 1.3437436907378225e-05, + "loss": 1.4248, + "mean_token_accuracy": 0.6142558038234711, + "num_tokens": 5016713.0, + "step": 640 + }, + { + "entropy": 1.9646029412746429, + "epoch": 4.1408, + "grad_norm": 1.5146726369857788, + "learning_rate": 1.3242884319210463e-05, + "loss": 1.3875, + "mean_token_accuracy": 0.624424883723259, + "num_tokens": 5096513.0, + "step": 650 + }, + { + "entropy": 1.93471617102623, + "epoch": 4.2048, + "grad_norm": 1.5090768337249756, + "learning_rate": 1.3046950117830888e-05, + "loss": 1.3884, + "mean_token_accuracy": 0.6222448632121086, + "num_tokens": 5177075.0, + "step": 660 + }, + { + "entropy": 2.002266028523445, + "epoch": 4.2688, + "grad_norm": 1.74358332157135, + "learning_rate": 1.2849717779939439e-05, + "loss": 1.4062, + "mean_token_accuracy": 0.6180147424340248, + "num_tokens": 5252902.0, + "step": 670 + }, + { + "entropy": 1.9397415190935134, + "epoch": 4.3328, + "grad_norm": 1.774728775024414, + "learning_rate": 1.2651271335300063e-05, + "loss": 1.3933, + "mean_token_accuracy": 0.626343595981598, + "num_tokens": 5331448.0, + "step": 680 + }, + { + "entropy": 1.9571841150522231, + "epoch": 4.3968, + "grad_norm": 1.80965256690979, + "learning_rate": 1.2451695330940268e-05, + "loss": 1.4205, + "mean_token_accuracy": 0.6187710732221603, + "num_tokens": 5410857.0, + "step": 690 + }, + { + "entropy": 1.9691186994314194, + "epoch": 4.4608, + "grad_norm": 1.5400609970092773, + "learning_rate": 1.2251074795130339e-05, + "loss": 1.4123, + "mean_token_accuracy": 0.614769059419632, + "num_tokens": 5488867.0, + "step": 700 + }, + { + "entropy": 1.9635825514793397, + "epoch": 4.5248, + "grad_norm": 1.467608094215393, + "learning_rate": 1.2049495201157489e-05, + "loss": 1.4228, + "mean_token_accuracy": 0.6202724784612655, + "num_tokens": 5567515.0, + "step": 710 + }, + { + "entropy": 1.9384470194578172, + "epoch": 4.5888, + "grad_norm": 1.652387022972107, + "learning_rate": 1.1847042430910451e-05, + "loss": 1.4273, + "mean_token_accuracy": 0.6190450325608253, + "num_tokens": 5648858.0, + "step": 720 + }, + { + "entropy": 1.9911590039730072, + "epoch": 4.6528, + "grad_norm": 1.7492380142211914, + "learning_rate": 1.1643802738289955e-05, + "loss": 1.4776, + "mean_token_accuracy": 0.6073927089571953, + "num_tokens": 5725459.0, + "step": 730 + }, + { + "entropy": 1.9724233269691467, + "epoch": 4.7168, + "grad_norm": 1.709669828414917, + "learning_rate": 1.1439862712460721e-05, + "loss": 1.4217, + "mean_token_accuracy": 0.6184087961912155, + "num_tokens": 5801601.0, + "step": 740 + }, + { + "entropy": 1.9725236982107162, + "epoch": 4.7808, + "grad_norm": 1.7469470500946045, + "learning_rate": 1.1235309240960621e-05, + "loss": 1.405, + "mean_token_accuracy": 0.6196158319711685, + "num_tokens": 5881107.0, + "step": 750 + }, + { + "entropy": 1.9484833419322967, + "epoch": 4.8448, + "grad_norm": 1.532373309135437, + "learning_rate": 1.1030229472682719e-05, + "loss": 1.4155, + "mean_token_accuracy": 0.611663281917572, + "num_tokens": 5960375.0, + "step": 760 + }, + { + "entropy": 1.9964754343032838, + "epoch": 4.9088, + "grad_norm": 1.7157669067382812, + "learning_rate": 1.0824710780745954e-05, + "loss": 1.4295, + "mean_token_accuracy": 0.6131752103567123, + "num_tokens": 6038267.0, + "step": 770 + }, + { + "entropy": 1.9598666340112687, + "epoch": 4.9728, + "grad_norm": 1.9844586849212646, + "learning_rate": 1.06188407252703e-05, + "loss": 1.397, + "mean_token_accuracy": 0.6226776748895645, + "num_tokens": 6114749.0, + "step": 780 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.916423850556129e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-785/training_args.bin b/checkpoint-785/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-785/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/checkpoint-942/README.md b/checkpoint-942/README.md new file mode 100644 index 0000000000000000000000000000000000000000..96b9f5618833a1728fbecbefb87f08b279b6b2ed --- /dev/null +++ b/checkpoint-942/README.md @@ -0,0 +1,209 @@ +--- +base_model: meta-llama/Llama-3.1-8B-Instruct +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:meta-llama/Llama-3.1-8B-Instruct +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-942/adapter_config.json b/checkpoint-942/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c9f8ac4415dbfa10513a708f0c7a350d03f056a5 --- /dev/null +++ b/checkpoint-942/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "up_proj", + "o_proj", + "k_proj", + "q_proj", + "gate_proj", + "v_proj", + "down_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-942/adapter_model.safetensors b/checkpoint-942/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6c1a92c65b6423ddc3a1b4c30ee5f79b19ac920 --- /dev/null +++ b/checkpoint-942/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a9facc0ea992b7d4cf4ffd7558341c72ccf32a515975472efc0f01b9282fd22 +size 335604696 diff --git a/checkpoint-942/chat_template.jinja b/checkpoint-942/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..33089ace1be88f22a10fe861ad49718d5d886090 --- /dev/null +++ b/checkpoint-942/chat_template.jinja @@ -0,0 +1,109 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- set date_string = "26 Jul 2024" %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message + builtin tools #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if builtin_tools is defined or tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{%- if builtin_tools is defined %} + {{- "Tools: " + builtin_tools | reject('equalto', 'code_interpreter') | join(", ") + "\n\n"}} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {%- if builtin_tools is defined and tool_call.name in builtin_tools %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- "<|python_tag|>" + tool_call.name + ".call(" }} + {%- for arg_name, arg_val in tool_call.arguments | items %} + {{- arg_name + '="' + arg_val + '"' }} + {%- if not loop.last %} + {{- ", " }} + {%- endif %} + {%- endfor %} + {{- ")" }} + {%- else %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {%- endif %} + {%- if builtin_tools is defined %} + {#- This means we're in ipython mode #} + {{- "<|eom_id|>" }} + {%- else %} + {{- "<|eot_id|>" }} + {%- endif %} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/checkpoint-942/optimizer.pt b/checkpoint-942/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2a7556c4d8aea478455ab03990eef62b8934fb0 --- /dev/null +++ b/checkpoint-942/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:408ba81ed6a1f89c133737ec53177f46c4cb065a2c686ad3455d0d41e6880e73 +size 671473443 diff --git a/checkpoint-942/rng_state.pth b/checkpoint-942/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..882b6f57b88e1c30ee0240d884c8ad9a89e38a25 --- /dev/null +++ b/checkpoint-942/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2076557ec9c7422e59a0adc994706b844a10d6f8a361eba8480d5bf65a10a7b7 +size 14645 diff --git a/checkpoint-942/scheduler.pt b/checkpoint-942/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d331d1da57cca8df7b0b2bbdde269c871ca68ad0 --- /dev/null +++ b/checkpoint-942/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e2e1de4995d2d70d88e704c4168a68fb7c78b73afd2783e4ab8d1dcb64280f +size 1465 diff --git a/checkpoint-942/special_tokens_map.json b/checkpoint-942/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/checkpoint-942/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-942/tokenizer.json b/checkpoint-942/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/checkpoint-942/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/checkpoint-942/tokenizer_config.json b/checkpoint-942/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/checkpoint-942/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-942/trainer_state.json b/checkpoint-942/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..27e450603b346a1abc9cadceb222aa297f677f71 --- /dev/null +++ b/checkpoint-942/trainer_state.json @@ -0,0 +1,974 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 942, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 2.05683453977108, + "epoch": 0.064, + "grad_norm": 1.5962693691253662, + "learning_rate": 3.7500000000000005e-06, + "loss": 2.056, + "mean_token_accuracy": 0.527055786550045, + "num_tokens": 78959.0, + "step": 10 + }, + { + "entropy": 2.2151891469955443, + "epoch": 0.128, + "grad_norm": 1.022516131401062, + "learning_rate": 7.916666666666667e-06, + "loss": 2.0766, + "mean_token_accuracy": 0.5204883277416229, + "num_tokens": 158094.0, + "step": 20 + }, + { + "entropy": 2.376429131627083, + "epoch": 0.192, + "grad_norm": 0.8220515251159668, + "learning_rate": 1.2083333333333333e-05, + "loss": 1.8902, + "mean_token_accuracy": 0.5391427092254162, + "num_tokens": 239163.0, + "step": 30 + }, + { + "entropy": 2.285146689414978, + "epoch": 0.256, + "grad_norm": 0.7022648453712463, + "learning_rate": 1.6250000000000002e-05, + "loss": 1.7478, + "mean_token_accuracy": 0.56041978597641, + "num_tokens": 317818.0, + "step": 40 + }, + { + "entropy": 2.3282038152217863, + "epoch": 0.32, + "grad_norm": 0.5584391951560974, + "learning_rate": 1.9999978697023387e-05, + "loss": 1.7687, + "mean_token_accuracy": 0.5601607479155064, + "num_tokens": 396146.0, + "step": 50 + }, + { + "entropy": 2.2709642231464384, + "epoch": 0.384, + "grad_norm": 0.5373395085334778, + "learning_rate": 1.999742244965125e-05, + "loss": 1.6913, + "mean_token_accuracy": 0.5693033933639526, + "num_tokens": 474291.0, + "step": 60 + }, + { + "entropy": 2.2445768117904663, + "epoch": 0.448, + "grad_norm": 0.4558122754096985, + "learning_rate": 1.9990606854864625e-05, + "loss": 1.679, + "mean_token_accuracy": 0.5720810443162918, + "num_tokens": 554739.0, + "step": 70 + }, + { + "entropy": 2.2270330280065536, + "epoch": 0.512, + "grad_norm": 0.5535369515419006, + "learning_rate": 1.997953481641056e-05, + "loss": 1.6522, + "mean_token_accuracy": 0.574026207625866, + "num_tokens": 633658.0, + "step": 80 + }, + { + "entropy": 2.2367560386657717, + "epoch": 0.576, + "grad_norm": 0.5366299152374268, + "learning_rate": 1.9964211051470778e-05, + "loss": 1.6955, + "mean_token_accuracy": 0.5699351653456688, + "num_tokens": 712400.0, + "step": 90 + }, + { + "entropy": 2.21894571185112, + "epoch": 0.64, + "grad_norm": 0.4690150022506714, + "learning_rate": 1.994464208865191e-05, + "loss": 1.7048, + "mean_token_accuracy": 0.5701304003596306, + "num_tokens": 792630.0, + "step": 100 + }, + { + "entropy": 2.235249537229538, + "epoch": 0.704, + "grad_norm": 0.5834165811538696, + "learning_rate": 1.9920836265204047e-05, + "loss": 1.7032, + "mean_token_accuracy": 0.5705543920397759, + "num_tokens": 872045.0, + "step": 110 + }, + { + "entropy": 2.2257163137197495, + "epoch": 0.768, + "grad_norm": 0.5584805011749268, + "learning_rate": 1.989280372346868e-05, + "loss": 1.666, + "mean_token_accuracy": 0.5684764981269836, + "num_tokens": 952057.0, + "step": 120 + }, + { + "entropy": 2.2563431203365325, + "epoch": 0.832, + "grad_norm": 0.5170231461524963, + "learning_rate": 1.986055640655763e-05, + "loss": 1.7134, + "mean_token_accuracy": 0.570289532840252, + "num_tokens": 1029200.0, + "step": 130 + }, + { + "entropy": 2.2378907680511473, + "epoch": 0.896, + "grad_norm": 0.5027748942375183, + "learning_rate": 1.9824108053264726e-05, + "loss": 1.6719, + "mean_token_accuracy": 0.5730531394481659, + "num_tokens": 1105844.0, + "step": 140 + }, + { + "entropy": 2.1966699600219726, + "epoch": 0.96, + "grad_norm": 0.5884814262390137, + "learning_rate": 1.9783474192212484e-05, + "loss": 1.6327, + "mean_token_accuracy": 0.5813805550336838, + "num_tokens": 1182935.0, + "step": 150 + }, + { + "entropy": 2.20564815804765, + "epoch": 1.0192, + "grad_norm": 0.570175290107727, + "learning_rate": 1.9738672135236218e-05, + "loss": 1.6118, + "mean_token_accuracy": 0.582583570802534, + "num_tokens": 1254363.0, + "step": 160 + }, + { + "entropy": 2.1847074955701826, + "epoch": 1.0832, + "grad_norm": 0.5836730003356934, + "learning_rate": 1.968972097000843e-05, + "loss": 1.6172, + "mean_token_accuracy": 0.5812226444482803, + "num_tokens": 1330281.0, + "step": 170 + }, + { + "entropy": 2.1814055383205413, + "epoch": 1.1472, + "grad_norm": 0.5746439695358276, + "learning_rate": 1.96366415519066e-05, + "loss": 1.6192, + "mean_token_accuracy": 0.5789176046848297, + "num_tokens": 1409407.0, + "step": 180 + }, + { + "entropy": 2.2038993716239927, + "epoch": 1.2112, + "grad_norm": 0.5652104616165161, + "learning_rate": 1.957945649512788e-05, + "loss": 1.6166, + "mean_token_accuracy": 0.5809548154473305, + "num_tokens": 1489034.0, + "step": 190 + }, + { + "entropy": 2.173789343237877, + "epoch": 1.2752, + "grad_norm": 0.6653291583061218, + "learning_rate": 1.951819016305442e-05, + "loss": 1.62, + "mean_token_accuracy": 0.5827470317482948, + "num_tokens": 1568549.0, + "step": 200 + }, + { + "entropy": 2.1907752990722655, + "epoch": 1.3392, + "grad_norm": 0.7024573087692261, + "learning_rate": 1.9452868657873513e-05, + "loss": 1.6397, + "mean_token_accuracy": 0.5796025812625885, + "num_tokens": 1647404.0, + "step": 210 + }, + { + "entropy": 2.189376249909401, + "epoch": 1.4032, + "grad_norm": 0.5727422833442688, + "learning_rate": 1.9383519809456862e-05, + "loss": 1.6349, + "mean_token_accuracy": 0.5815459445118905, + "num_tokens": 1728421.0, + "step": 220 + }, + { + "entropy": 2.209022229909897, + "epoch": 1.4672, + "grad_norm": 0.6421232223510742, + "learning_rate": 1.931017316350384e-05, + "loss": 1.6425, + "mean_token_accuracy": 0.5790404245257378, + "num_tokens": 1806891.0, + "step": 230 + }, + { + "entropy": 2.2337595343589784, + "epoch": 1.5312000000000001, + "grad_norm": 0.6296209692955017, + "learning_rate": 1.9232859968953702e-05, + "loss": 1.624, + "mean_token_accuracy": 0.5814317353069782, + "num_tokens": 1883100.0, + "step": 240 + }, + { + "entropy": 2.205833575129509, + "epoch": 1.5952, + "grad_norm": 0.6371021866798401, + "learning_rate": 1.9151613164672136e-05, + "loss": 1.6284, + "mean_token_accuracy": 0.5819905593991279, + "num_tokens": 1961317.0, + "step": 250 + }, + { + "entropy": 2.205822005867958, + "epoch": 1.6592, + "grad_norm": 0.6950616836547852, + "learning_rate": 1.9066467365417844e-05, + "loss": 1.6374, + "mean_token_accuracy": 0.5760326236486435, + "num_tokens": 2042881.0, + "step": 260 + }, + { + "entropy": 2.2163637399673464, + "epoch": 1.7231999999999998, + "grad_norm": 0.7801616191864014, + "learning_rate": 1.8977458847095117e-05, + "loss": 1.663, + "mean_token_accuracy": 0.5744953289628029, + "num_tokens": 2121403.0, + "step": 270 + }, + { + "entropy": 2.199243775010109, + "epoch": 1.7872, + "grad_norm": 0.6671239733695984, + "learning_rate": 1.888462553129867e-05, + "loss": 1.6456, + "mean_token_accuracy": 0.579181258380413, + "num_tokens": 2200908.0, + "step": 280 + }, + { + "entropy": 2.214826595783234, + "epoch": 1.8512, + "grad_norm": 0.7415009140968323, + "learning_rate": 1.878800696915737e-05, + "loss": 1.6113, + "mean_token_accuracy": 0.5840038731694221, + "num_tokens": 2278414.0, + "step": 290 + }, + { + "entropy": 2.187604659795761, + "epoch": 1.9152, + "grad_norm": 0.662319540977478, + "learning_rate": 1.868764432448369e-05, + "loss": 1.6182, + "mean_token_accuracy": 0.580166706442833, + "num_tokens": 2355826.0, + "step": 300 + }, + { + "entropy": 2.2184703826904295, + "epoch": 1.9792, + "grad_norm": 0.7123025059700012, + "learning_rate": 1.8583580356236065e-05, + "loss": 1.655, + "mean_token_accuracy": 0.5762834578752518, + "num_tokens": 2434933.0, + "step": 310 + }, + { + "entropy": 2.1887036239778674, + "epoch": 2.0384, + "grad_norm": 0.6846157312393188, + "learning_rate": 1.8475859400301708e-05, + "loss": 1.5935, + "mean_token_accuracy": 0.5881956976813238, + "num_tokens": 2507166.0, + "step": 320 + }, + { + "entropy": 2.102977079153061, + "epoch": 2.1024, + "grad_norm": 0.7967628240585327, + "learning_rate": 1.8364527350607527e-05, + "loss": 1.5405, + "mean_token_accuracy": 0.5946892097592353, + "num_tokens": 2584298.0, + "step": 330 + }, + { + "entropy": 2.118516767024994, + "epoch": 2.1664, + "grad_norm": 0.7417224645614624, + "learning_rate": 1.824963163956726e-05, + "loss": 1.5727, + "mean_token_accuracy": 0.5870080485939979, + "num_tokens": 2663601.0, + "step": 340 + }, + { + "entropy": 2.104418155550957, + "epoch": 2.2304, + "grad_norm": 0.7956721782684326, + "learning_rate": 1.8131221217873175e-05, + "loss": 1.5575, + "mean_token_accuracy": 0.5936456203460694, + "num_tokens": 2744783.0, + "step": 350 + }, + { + "entropy": 2.129578319191933, + "epoch": 2.2944, + "grad_norm": 0.769292950630188, + "learning_rate": 1.8009346533640877e-05, + "loss": 1.5878, + "mean_token_accuracy": 0.5841517195105552, + "num_tokens": 2823023.0, + "step": 360 + }, + { + "entropy": 2.097687366604805, + "epoch": 2.3584, + "grad_norm": 0.9341740608215332, + "learning_rate": 1.7884059510916167e-05, + "loss": 1.5346, + "mean_token_accuracy": 0.599460557103157, + "num_tokens": 2899598.0, + "step": 370 + }, + { + "entropy": 2.151599031686783, + "epoch": 2.4224, + "grad_norm": 0.8752340078353882, + "learning_rate": 1.7755413527553087e-05, + "loss": 1.5984, + "mean_token_accuracy": 0.585393351316452, + "num_tokens": 2978519.0, + "step": 380 + }, + { + "entropy": 2.1223404884338377, + "epoch": 2.4864, + "grad_norm": 1.0296390056610107, + "learning_rate": 1.7623463392472574e-05, + "loss": 1.5232, + "mean_token_accuracy": 0.595654422044754, + "num_tokens": 3055327.0, + "step": 390 + }, + { + "entropy": 2.16276493370533, + "epoch": 2.5504, + "grad_norm": 0.9905762672424316, + "learning_rate": 1.748826532231142e-05, + "loss": 1.6049, + "mean_token_accuracy": 0.5822189599275589, + "num_tokens": 3135348.0, + "step": 400 + }, + { + "entropy": 2.127479985356331, + "epoch": 2.6144, + "grad_norm": 0.851375162601471, + "learning_rate": 1.7349876917471474e-05, + "loss": 1.5842, + "mean_token_accuracy": 0.5855211839079857, + "num_tokens": 3213122.0, + "step": 410 + }, + { + "entropy": 2.167752879858017, + "epoch": 2.6784, + "grad_norm": 0.975143313407898, + "learning_rate": 1.7208357137579318e-05, + "loss": 1.5918, + "mean_token_accuracy": 0.5839722648262977, + "num_tokens": 3289583.0, + "step": 420 + }, + { + "entropy": 2.127084198594093, + "epoch": 2.7424, + "grad_norm": 0.8077936768531799, + "learning_rate": 1.7063766276366814e-05, + "loss": 1.5916, + "mean_token_accuracy": 0.5900941833853721, + "num_tokens": 3369740.0, + "step": 430 + }, + { + "entropy": 2.1315969794988634, + "epoch": 2.8064, + "grad_norm": 0.9403624534606934, + "learning_rate": 1.6916165935983323e-05, + "loss": 1.5713, + "mean_token_accuracy": 0.5892721861600876, + "num_tokens": 3448328.0, + "step": 440 + }, + { + "entropy": 2.130605939030647, + "epoch": 2.8704, + "grad_norm": 0.8252040147781372, + "learning_rate": 1.676561900075041e-05, + "loss": 1.6003, + "mean_token_accuracy": 0.5845118075609207, + "num_tokens": 3529853.0, + "step": 450 + }, + { + "entropy": 2.112012493610382, + "epoch": 2.9344, + "grad_norm": 0.9267668724060059, + "learning_rate": 1.6612189610370336e-05, + "loss": 1.5796, + "mean_token_accuracy": 0.5887707889080047, + "num_tokens": 3610922.0, + "step": 460 + }, + { + "entropy": 2.100590059161186, + "epoch": 2.9984, + "grad_norm": 0.8996879458427429, + "learning_rate": 1.6455943132599698e-05, + "loss": 1.5483, + "mean_token_accuracy": 0.5934251204133034, + "num_tokens": 3688391.0, + "step": 470 + }, + { + "entropy": 2.1115864160898568, + "epoch": 3.0576, + "grad_norm": 1.097270131111145, + "learning_rate": 1.6296946135399835e-05, + "loss": 1.5506, + "mean_token_accuracy": 0.592829834770512, + "num_tokens": 3758747.0, + "step": 480 + }, + { + "entropy": 2.0610430628061294, + "epoch": 3.1216, + "grad_norm": 1.176645278930664, + "learning_rate": 1.613526635857591e-05, + "loss": 1.4461, + "mean_token_accuracy": 0.6111307457089424, + "num_tokens": 3834689.0, + "step": 490 + }, + { + "entropy": 2.0154007196426393, + "epoch": 3.1856, + "grad_norm": 1.1834276914596558, + "learning_rate": 1.5970972684916754e-05, + "loss": 1.4852, + "mean_token_accuracy": 0.6026980608701706, + "num_tokens": 3916450.0, + "step": 500 + }, + { + "entropy": 2.0441433399915696, + "epoch": 3.2496, + "grad_norm": 1.159286379814148, + "learning_rate": 1.5804135110847708e-05, + "loss": 1.4978, + "mean_token_accuracy": 0.6042912915349007, + "num_tokens": 3998511.0, + "step": 510 + }, + { + "entropy": 2.0493109285831452, + "epoch": 3.3136, + "grad_norm": 1.2141708135604858, + "learning_rate": 1.5634824716609037e-05, + "loss": 1.5018, + "mean_token_accuracy": 0.5995921581983567, + "num_tokens": 4077676.0, + "step": 520 + }, + { + "entropy": 2.0533218771219253, + "epoch": 3.3776, + "grad_norm": 1.1630637645721436, + "learning_rate": 1.5463113635972577e-05, + "loss": 1.499, + "mean_token_accuracy": 0.6046154126524925, + "num_tokens": 4155264.0, + "step": 530 + }, + { + "entropy": 2.0600034058094026, + "epoch": 3.4416, + "grad_norm": 1.2523504495620728, + "learning_rate": 1.528907502550954e-05, + "loss": 1.521, + "mean_token_accuracy": 0.6000443026423454, + "num_tokens": 4233655.0, + "step": 540 + }, + { + "entropy": 2.0414596855640412, + "epoch": 3.5056000000000003, + "grad_norm": 1.3990252017974854, + "learning_rate": 1.5112783033422547e-05, + "loss": 1.4899, + "mean_token_accuracy": 0.6026965886354446, + "num_tokens": 4311644.0, + "step": 550 + }, + { + "entropy": 2.061043033003807, + "epoch": 3.5696, + "grad_norm": 1.1884260177612305, + "learning_rate": 1.4934312767955193e-05, + "loss": 1.5143, + "mean_token_accuracy": 0.5981319859623909, + "num_tokens": 4390933.0, + "step": 560 + }, + { + "entropy": 2.034099668264389, + "epoch": 3.6336, + "grad_norm": 1.2996599674224854, + "learning_rate": 1.4753740265392595e-05, + "loss": 1.4953, + "mean_token_accuracy": 0.6029247522354126, + "num_tokens": 4470462.0, + "step": 570 + }, + { + "entropy": 2.0379767954349517, + "epoch": 3.6976, + "grad_norm": 1.2936193943023682, + "learning_rate": 1.4571142457666536e-05, + "loss": 1.4965, + "mean_token_accuracy": 0.6041712030768395, + "num_tokens": 4549236.0, + "step": 580 + }, + { + "entropy": 2.040063351392746, + "epoch": 3.7616, + "grad_norm": 1.5094560384750366, + "learning_rate": 1.4386597139579041e-05, + "loss": 1.4979, + "mean_token_accuracy": 0.6051288455724716, + "num_tokens": 4628758.0, + "step": 590 + }, + { + "entropy": 1.9998936265707017, + "epoch": 3.8256, + "grad_norm": 1.3166426420211792, + "learning_rate": 1.4200182935658327e-05, + "loss": 1.459, + "mean_token_accuracy": 0.6084850415587425, + "num_tokens": 4708526.0, + "step": 600 + }, + { + "entropy": 2.0041965901851655, + "epoch": 3.8895999999999997, + "grad_norm": 1.2710400819778442, + "learning_rate": 1.4011979266661235e-05, + "loss": 1.4831, + "mean_token_accuracy": 0.6057328775525093, + "num_tokens": 4788733.0, + "step": 610 + }, + { + "entropy": 2.0265558779239656, + "epoch": 3.9536, + "grad_norm": 1.4318969249725342, + "learning_rate": 1.3822066315736477e-05, + "loss": 1.4966, + "mean_token_accuracy": 0.5994595810770988, + "num_tokens": 4866451.0, + "step": 620 + }, + { + "entropy": 2.0692459924800977, + "epoch": 4.0128, + "grad_norm": 1.2546013593673706, + "learning_rate": 1.363052499426302e-05, + "loss": 1.503, + "mean_token_accuracy": 0.6039850309088424, + "num_tokens": 4936715.0, + "step": 630 + }, + { + "entropy": 1.9788923293352128, + "epoch": 4.0768, + "grad_norm": 1.416927456855774, + "learning_rate": 1.3437436907378225e-05, + "loss": 1.4248, + "mean_token_accuracy": 0.6142558038234711, + "num_tokens": 5016713.0, + "step": 640 + }, + { + "entropy": 1.9646029412746429, + "epoch": 4.1408, + "grad_norm": 1.5146726369857788, + "learning_rate": 1.3242884319210463e-05, + "loss": 1.3875, + "mean_token_accuracy": 0.624424883723259, + "num_tokens": 5096513.0, + "step": 650 + }, + { + "entropy": 1.93471617102623, + "epoch": 4.2048, + "grad_norm": 1.5090768337249756, + "learning_rate": 1.3046950117830888e-05, + "loss": 1.3884, + "mean_token_accuracy": 0.6222448632121086, + "num_tokens": 5177075.0, + "step": 660 + }, + { + "entropy": 2.002266028523445, + "epoch": 4.2688, + "grad_norm": 1.74358332157135, + "learning_rate": 1.2849717779939439e-05, + "loss": 1.4062, + "mean_token_accuracy": 0.6180147424340248, + "num_tokens": 5252902.0, + "step": 670 + }, + { + "entropy": 1.9397415190935134, + "epoch": 4.3328, + "grad_norm": 1.774728775024414, + "learning_rate": 1.2651271335300063e-05, + "loss": 1.3933, + "mean_token_accuracy": 0.626343595981598, + "num_tokens": 5331448.0, + "step": 680 + }, + { + "entropy": 1.9571841150522231, + "epoch": 4.3968, + "grad_norm": 1.80965256690979, + "learning_rate": 1.2451695330940268e-05, + "loss": 1.4205, + "mean_token_accuracy": 0.6187710732221603, + "num_tokens": 5410857.0, + "step": 690 + }, + { + "entropy": 1.9691186994314194, + "epoch": 4.4608, + "grad_norm": 1.5400609970092773, + "learning_rate": 1.2251074795130339e-05, + "loss": 1.4123, + "mean_token_accuracy": 0.614769059419632, + "num_tokens": 5488867.0, + "step": 700 + }, + { + "entropy": 1.9635825514793397, + "epoch": 4.5248, + "grad_norm": 1.467608094215393, + "learning_rate": 1.2049495201157489e-05, + "loss": 1.4228, + "mean_token_accuracy": 0.6202724784612655, + "num_tokens": 5567515.0, + "step": 710 + }, + { + "entropy": 1.9384470194578172, + "epoch": 4.5888, + "grad_norm": 1.652387022972107, + "learning_rate": 1.1847042430910451e-05, + "loss": 1.4273, + "mean_token_accuracy": 0.6190450325608253, + "num_tokens": 5648858.0, + "step": 720 + }, + { + "entropy": 1.9911590039730072, + "epoch": 4.6528, + "grad_norm": 1.7492380142211914, + "learning_rate": 1.1643802738289955e-05, + "loss": 1.4776, + "mean_token_accuracy": 0.6073927089571953, + "num_tokens": 5725459.0, + "step": 730 + }, + { + "entropy": 1.9724233269691467, + "epoch": 4.7168, + "grad_norm": 1.709669828414917, + "learning_rate": 1.1439862712460721e-05, + "loss": 1.4217, + "mean_token_accuracy": 0.6184087961912155, + "num_tokens": 5801601.0, + "step": 740 + }, + { + "entropy": 1.9725236982107162, + "epoch": 4.7808, + "grad_norm": 1.7469470500946045, + "learning_rate": 1.1235309240960621e-05, + "loss": 1.405, + "mean_token_accuracy": 0.6196158319711685, + "num_tokens": 5881107.0, + "step": 750 + }, + { + "entropy": 1.9484833419322967, + "epoch": 4.8448, + "grad_norm": 1.532373309135437, + "learning_rate": 1.1030229472682719e-05, + "loss": 1.4155, + "mean_token_accuracy": 0.611663281917572, + "num_tokens": 5960375.0, + "step": 760 + }, + { + "entropy": 1.9964754343032838, + "epoch": 4.9088, + "grad_norm": 1.7157669067382812, + "learning_rate": 1.0824710780745954e-05, + "loss": 1.4295, + "mean_token_accuracy": 0.6131752103567123, + "num_tokens": 6038267.0, + "step": 770 + }, + { + "entropy": 1.9598666340112687, + "epoch": 4.9728, + "grad_norm": 1.9844586849212646, + "learning_rate": 1.06188407252703e-05, + "loss": 1.397, + "mean_token_accuracy": 0.6226776748895645, + "num_tokens": 6114749.0, + "step": 780 + }, + { + "entropy": 1.9227982276194804, + "epoch": 5.032, + "grad_norm": 1.8960447311401367, + "learning_rate": 1.0412707016072254e-05, + "loss": 1.3649, + "mean_token_accuracy": 0.6269845414806057, + "num_tokens": 6190567.0, + "step": 790 + }, + { + "entropy": 1.9008578658103943, + "epoch": 5.096, + "grad_norm": 2.1205599308013916, + "learning_rate": 1.0206397475296548e-05, + "loss": 1.3582, + "mean_token_accuracy": 0.6292989999055862, + "num_tokens": 6269285.0, + "step": 800 + }, + { + "entropy": 1.9224162876605988, + "epoch": 5.16, + "grad_norm": 2.0454013347625732, + "learning_rate": 1e-05, + "loss": 1.3349, + "mean_token_accuracy": 0.6315066292881966, + "num_tokens": 6345352.0, + "step": 810 + }, + { + "entropy": 1.9340467154979706, + "epoch": 5.224, + "grad_norm": 2.2607693672180176, + "learning_rate": 9.793602524703456e-06, + "loss": 1.359, + "mean_token_accuracy": 0.6322078078985214, + "num_tokens": 6422524.0, + "step": 820 + }, + { + "entropy": 1.9296668291091919, + "epoch": 5.288, + "grad_norm": 2.1245901584625244, + "learning_rate": 9.58729298392775e-06, + "loss": 1.3672, + "mean_token_accuracy": 0.6282135233283043, + "num_tokens": 6500128.0, + "step": 830 + }, + { + "entropy": 1.9272812247276305, + "epoch": 5.352, + "grad_norm": 1.965820550918579, + "learning_rate": 9.381159274729704e-06, + "loss": 1.3786, + "mean_token_accuracy": 0.6249860525131226, + "num_tokens": 6578766.0, + "step": 840 + }, + { + "entropy": 1.904970219731331, + "epoch": 5.416, + "grad_norm": 1.9188759326934814, + "learning_rate": 9.175289219254051e-06, + "loss": 1.3418, + "mean_token_accuracy": 0.6325456693768501, + "num_tokens": 6658732.0, + "step": 850 + }, + { + "entropy": 1.8833305448293687, + "epoch": 5.48, + "grad_norm": 1.9675428867340088, + "learning_rate": 8.969770527317283e-06, + "loss": 1.3274, + "mean_token_accuracy": 0.6377805054187775, + "num_tokens": 6738683.0, + "step": 860 + }, + { + "entropy": 1.8806802958250046, + "epoch": 5.5440000000000005, + "grad_norm": 1.8849304914474487, + "learning_rate": 8.764690759039382e-06, + "loss": 1.3109, + "mean_token_accuracy": 0.636364534497261, + "num_tokens": 6817786.0, + "step": 870 + }, + { + "entropy": 1.8846195042133331, + "epoch": 5.608, + "grad_norm": 2.050208330154419, + "learning_rate": 8.56013728753928e-06, + "loss": 1.3449, + "mean_token_accuracy": 0.6316975012421608, + "num_tokens": 6896222.0, + "step": 880 + }, + { + "entropy": 1.88524529337883, + "epoch": 5.672, + "grad_norm": 2.1371288299560547, + "learning_rate": 8.356197261710048e-06, + "loss": 1.346, + "mean_token_accuracy": 0.633928644657135, + "num_tokens": 6976885.0, + "step": 890 + }, + { + "entropy": 1.9162244260311128, + "epoch": 5.736, + "grad_norm": 1.9879032373428345, + "learning_rate": 8.152957569089552e-06, + "loss": 1.3486, + "mean_token_accuracy": 0.6311523199081421, + "num_tokens": 7053473.0, + "step": 900 + }, + { + "entropy": 1.89161317050457, + "epoch": 5.8, + "grad_norm": 2.2934179306030273, + "learning_rate": 7.950504798842513e-06, + "loss": 1.3699, + "mean_token_accuracy": 0.6269390240311623, + "num_tokens": 7133137.0, + "step": 910 + }, + { + "entropy": 1.888116827607155, + "epoch": 5.864, + "grad_norm": 1.769087791442871, + "learning_rate": 7.748925204869667e-06, + "loss": 1.3756, + "mean_token_accuracy": 0.6285945609211921, + "num_tokens": 7213693.0, + "step": 920 + }, + { + "entropy": 1.89390210211277, + "epoch": 5.928, + "grad_norm": 2.2577364444732666, + "learning_rate": 7.548304669059735e-06, + "loss": 1.3396, + "mean_token_accuracy": 0.6290415957570076, + "num_tokens": 7291999.0, + "step": 930 + }, + { + "entropy": 1.8755547761917115, + "epoch": 5.992, + "grad_norm": 2.080371618270874, + "learning_rate": 7.348728664699939e-06, + "loss": 1.3305, + "mean_token_accuracy": 0.6322756335139275, + "num_tokens": 7370138.0, + "step": 940 + } + ], + "logging_steps": 10, + "max_steps": 1570, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.101662526949294e+17, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-942/training_args.bin b/checkpoint-942/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/checkpoint-942/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..02ee80b6196926a5ad790a004d9efd6ab1ba6542 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,16 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c1d8d5c9024994f1d3b00f9662b8dd89ca13cf2 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8b0c7c141373ca36e5e819a28f60e146ccef652f --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2062 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1a5f4468dde6a3fc2b09988c9806ba5aca332fc --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91af1258c4cc0890d80e2d30f4d10bc6d4451df2b90c85138ebbe4ae8b936321 +size 6289