diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..3cc5355ac0bbf00f362d3c0a6c29974b8480e8da 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-317/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ad44b51773bb0ffe727f4fd832322cf1218fb120 --- /dev/null +++ b/README.md @@ -0,0 +1,62 @@ +--- +base_model: zai-org/GLM-4-32B-0414 +library_name: peft +model_name: uigen-fx-1 +tags: +- base_model:adapter:zai-org/GLM-4-32B-0414 +- lora +- sft +- transformers +- trl +licence: license +pipeline_tag: text-generation +--- + +# Model Card for uigen-fx-1 + +This model is a fine-tuned version of [zai-org/GLM-4-32B-0414](https://huggingface.co/zai-org/GLM-4-32B-0414). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + + + + +This model was trained with SFT. + +### Framework versions + +- PEFT 0.17.1 +- TRL: 0.23.1 +- Transformers: 4.57.0 +- Pytorch: 2.6.0+git684f6f2 +- Datasets: 4.1.1 +- Tokenizers: 0.22.1 + +## Citations + + + +Cite TRL as: + +```bibtex +@misc{vonwerra2022trl, + title = {{TRL: Transformer Reinforcement Learning}}, + author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec}, + year = 2020, + journal = {GitHub repository}, + publisher = {GitHub}, + howpublished = {\url{https://github.com/huggingface/trl}} +} +``` \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ebe86d1363a1171c6cd0dcf67ad18fa37c9f85 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "zai-org/GLM-4-32B-0414", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 256, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b59a542639a167049f8305880161855fb8df2bc6 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0449cadcb46edd1c553d548c9e5994006bddec56295db09c6773bfb8407c8b75 +size 4157688432 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..fdd583731de8bdf1830d38a3e34f7cd8f80605dd --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,41 @@ +[gMASK] +{%- if tools -%} +<|system|> +# 可用工具 +{% for tool in tools %} + {%- set function = tool.function if tool.get("function") else tool %} + +## {{ function.name }} + +{{ function | tojson(indent=4, ensure_ascii=False) }} +在调用上述函数时,请使用 Json 格式表示调用的参数。 +{%- endfor %} +{%- endif -%} + +{%- for msg in messages %} + {%- if msg.role == 'system' %} +<|system|> +{{ msg.content }} + {%- endif %} +{%- endfor %} + +{%- for message in messages if message.role != 'system' %} + {%- set role = message['role'] %} + {%- set content = message['content'] %} + {%- set meta = message.get("metadata", "") %} + + {%- if role == 'user' %} +<|user|> +{{ content }} + {%- elif role == 'assistant' and not meta %} +<|assistant|> +{{ content }} + {%- elif role == 'assistant' and meta %} +<|assistant|>{{ meta }} +{{ content }} + {%- elif role == 'observation' %} +<|observation|> +{{ content }} + {%- endif %} +{%- endfor %} +{% if add_generation_prompt %}<|assistant|>{% endif %} \ No newline at end of file diff --git a/checkpoint-100/README.md b/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..235c2e6ff7d5d996be0c01802fa2d7707c43b969 --- /dev/null +++ b/checkpoint-100/README.md @@ -0,0 +1,209 @@ +--- +base_model: zai-org/GLM-4-32B-0414 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:zai-org/GLM-4-32B-0414 +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-100/adapter_config.json b/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ebe86d1363a1171c6cd0dcf67ad18fa37c9f85 --- /dev/null +++ b/checkpoint-100/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "zai-org/GLM-4-32B-0414", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 256, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-100/adapter_model.safetensors b/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ad60ad1ec28e5c9d5bae205e31ca043b4de5d23 --- /dev/null +++ b/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8119c8e33cdd29aa4a638963b343ac471403042545b9d9ad2bbb969e77e28a90 +size 4157688432 diff --git a/checkpoint-100/chat_template.jinja b/checkpoint-100/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..fdd583731de8bdf1830d38a3e34f7cd8f80605dd --- /dev/null +++ b/checkpoint-100/chat_template.jinja @@ -0,0 +1,41 @@ +[gMASK] +{%- if tools -%} +<|system|> +# 可用工具 +{% for tool in tools %} + {%- set function = tool.function if tool.get("function") else tool %} + +## {{ function.name }} + +{{ function | tojson(indent=4, ensure_ascii=False) }} +在调用上述函数时,请使用 Json 格式表示调用的参数。 +{%- endfor %} +{%- endif -%} + +{%- for msg in messages %} + {%- if msg.role == 'system' %} +<|system|> +{{ msg.content }} + {%- endif %} +{%- endfor %} + +{%- for message in messages if message.role != 'system' %} + {%- set role = message['role'] %} + {%- set content = message['content'] %} + {%- set meta = message.get("metadata", "") %} + + {%- if role == 'user' %} +<|user|> +{{ content }} + {%- elif role == 'assistant' and not meta %} +<|assistant|> +{{ content }} + {%- elif role == 'assistant' and meta %} +<|assistant|>{{ meta }} +{{ content }} + {%- elif role == 'observation' %} +<|observation|> +{{ content }} + {%- endif %} +{%- endfor %} +{% if add_generation_prompt %}<|assistant|>{% endif %} \ No newline at end of file diff --git a/checkpoint-100/optimizer.pt b/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..43bf07ff9a96b6da78ffb812df4862ddf058efd9 --- /dev/null +++ b/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:134b73c523d89acf042593d3baab87c3320580a8bd3b9f5c1b0f0912801d09cb +size 8315744524 diff --git a/checkpoint-100/rng_state.pth b/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-100/scheduler.pt b/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce0e0033cf389c0d4a98dd27d6aa761bb2aa4c67 --- /dev/null +++ b/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:053acfc89a09092c32ba9278f8c0b9013c2e1e739602a1c250edfada03704f42 +size 1064 diff --git a/checkpoint-100/special_tokens_map.json b/checkpoint-100/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf953c2c8a3c89778c92e54c685942bb1130616 --- /dev/null +++ b/checkpoint-100/special_tokens_map.json @@ -0,0 +1,32 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "eos_token": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-100/tokenizer.json b/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1dde37a2715c11628fc84bf571976f9f80eb69 --- /dev/null +++ b/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c +size 19966496 diff --git a/checkpoint-100/tokenizer_config.json b/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d690b31eb34e3d9741aff67b37e0ecda2604739b --- /dev/null +++ b/checkpoint-100/tokenizer_config.json @@ -0,0 +1,145 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|user|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-100/trainer_state.json b/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a603f958b5c492376112eb63359ae327ffde8a68 --- /dev/null +++ b/checkpoint-100/trainer_state.json @@ -0,0 +1,134 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.31620553359683795, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 0.5656170375645161, + "epoch": 0.03162055335968379, + "grad_norm": 487949.625, + "learning_rate": 0.000971608832807571, + "loss": 0.598, + "mean_token_accuracy": 0.8383935391902924, + "num_tokens": 611773.0, + "step": 10 + }, + { + "entropy": 0.5844515649601817, + "epoch": 0.06324110671936758, + "grad_norm": 3576.093505859375, + "learning_rate": 0.0009400630914826499, + "loss": 0.6717, + "mean_token_accuracy": 0.8245183542370796, + "num_tokens": 1210060.0, + "step": 20 + }, + { + "entropy": 0.4919601235538721, + "epoch": 0.09486166007905138, + "grad_norm": 462362592.0, + "learning_rate": 0.0009085173501577287, + "loss": 0.6075, + "mean_token_accuracy": 0.8355057552456856, + "num_tokens": 1815732.0, + "step": 30 + }, + { + "entropy": 0.45399096198379996, + "epoch": 0.12648221343873517, + "grad_norm": 48967.296875, + "learning_rate": 0.0008769716088328076, + "loss": 0.6877, + "mean_token_accuracy": 0.8247161574661732, + "num_tokens": 2372291.0, + "step": 40 + }, + { + "entropy": 0.5189430050551891, + "epoch": 0.15810276679841898, + "grad_norm": 20488920.0, + "learning_rate": 0.0008454258675078864, + "loss": 0.6631, + "mean_token_accuracy": 0.8253108873963356, + "num_tokens": 2970656.0, + "step": 50 + }, + { + "entropy": 0.5161575745791197, + "epoch": 0.18972332015810275, + "grad_norm": 11390319.0, + "learning_rate": 0.0008138801261829653, + "loss": 0.5682, + "mean_token_accuracy": 0.8430261947214603, + "num_tokens": 3561312.0, + "step": 60 + }, + { + "entropy": 0.5097578268498182, + "epoch": 0.22134387351778656, + "grad_norm": 5408220.5, + "learning_rate": 0.0007823343848580442, + "loss": 0.598, + "mean_token_accuracy": 0.8383074931800365, + "num_tokens": 4164666.0, + "step": 70 + }, + { + "entropy": 0.561078536324203, + "epoch": 0.25296442687747034, + "grad_norm": 3655628.0, + "learning_rate": 0.000750788643533123, + "loss": 0.5818, + "mean_token_accuracy": 0.8375455126166343, + "num_tokens": 4750115.0, + "step": 80 + }, + { + "entropy": 0.6730448313057422, + "epoch": 0.2845849802371542, + "grad_norm": 65684.3125, + "learning_rate": 0.0007192429022082018, + "loss": 0.6005, + "mean_token_accuracy": 0.8356033861637115, + "num_tokens": 5290377.0, + "step": 90 + }, + { + "entropy": 0.68943473957479, + "epoch": 0.31620553359683795, + "grad_norm": 18166769664.0, + "learning_rate": 0.0006876971608832808, + "loss": 0.6068, + "mean_token_accuracy": 0.8362394802272319, + "num_tokens": 5882068.0, + "step": 100 + } + ], + "logging_steps": 10, + "max_steps": 317, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1531563857399153e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100/training_args.bin b/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..120f85ab118be2c00862d7334c96dd25039dfeea --- /dev/null +++ b/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bad5302c07edbe0267e2b38e65245429ed8bff1dce0fb4fc7af4aef2083cf5b +size 5880 diff --git a/checkpoint-150/README.md b/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..235c2e6ff7d5d996be0c01802fa2d7707c43b969 --- /dev/null +++ b/checkpoint-150/README.md @@ -0,0 +1,209 @@ +--- +base_model: zai-org/GLM-4-32B-0414 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:zai-org/GLM-4-32B-0414 +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-150/adapter_config.json b/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ebe86d1363a1171c6cd0dcf67ad18fa37c9f85 --- /dev/null +++ b/checkpoint-150/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "zai-org/GLM-4-32B-0414", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 256, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-150/adapter_model.safetensors b/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a6fb948ffb923bee7eeb7f63bc852df31df9d83 --- /dev/null +++ b/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:288041dcdf35700a374cc8d817fc989940d6ab17ad5c8e42154ccada1b9692ad +size 4157688432 diff --git a/checkpoint-150/chat_template.jinja b/checkpoint-150/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..fdd583731de8bdf1830d38a3e34f7cd8f80605dd --- /dev/null +++ b/checkpoint-150/chat_template.jinja @@ -0,0 +1,41 @@ +[gMASK] +{%- if tools -%} +<|system|> +# 可用工具 +{% for tool in tools %} + {%- set function = tool.function if tool.get("function") else tool %} + +## {{ function.name }} + +{{ function | tojson(indent=4, ensure_ascii=False) }} +在调用上述函数时,请使用 Json 格式表示调用的参数。 +{%- endfor %} +{%- endif -%} + +{%- for msg in messages %} + {%- if msg.role == 'system' %} +<|system|> +{{ msg.content }} + {%- endif %} +{%- endfor %} + +{%- for message in messages if message.role != 'system' %} + {%- set role = message['role'] %} + {%- set content = message['content'] %} + {%- set meta = message.get("metadata", "") %} + + {%- if role == 'user' %} +<|user|> +{{ content }} + {%- elif role == 'assistant' and not meta %} +<|assistant|> +{{ content }} + {%- elif role == 'assistant' and meta %} +<|assistant|>{{ meta }} +{{ content }} + {%- elif role == 'observation' %} +<|observation|> +{{ content }} + {%- endif %} +{%- endfor %} +{% if add_generation_prompt %}<|assistant|>{% endif %} \ No newline at end of file diff --git a/checkpoint-150/optimizer.pt b/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d69721b88de123d5d9a730ef429cc85f74b26dc --- /dev/null +++ b/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9cf9a826787bcddf53816dddb7b67a4d15143e694085f71a614974e535a844 +size 8315744524 diff --git a/checkpoint-150/rng_state.pth b/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-150/scheduler.pt b/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e2f308602c9b754a2484e6ea672a0766a55bf41 --- /dev/null +++ b/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73df21ec5e54e407c7df638e8253ba4bfc6287c699d87bcb807486face86d5da +size 1064 diff --git a/checkpoint-150/special_tokens_map.json b/checkpoint-150/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf953c2c8a3c89778c92e54c685942bb1130616 --- /dev/null +++ b/checkpoint-150/special_tokens_map.json @@ -0,0 +1,32 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "eos_token": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-150/tokenizer.json b/checkpoint-150/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1dde37a2715c11628fc84bf571976f9f80eb69 --- /dev/null +++ b/checkpoint-150/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c +size 19966496 diff --git a/checkpoint-150/tokenizer_config.json b/checkpoint-150/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d690b31eb34e3d9741aff67b37e0ecda2604739b --- /dev/null +++ b/checkpoint-150/tokenizer_config.json @@ -0,0 +1,145 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|user|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-150/trainer_state.json b/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a3c521b04d588daf39febe73ab2b6d96714a4a0f --- /dev/null +++ b/checkpoint-150/trainer_state.json @@ -0,0 +1,184 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4743083003952569, + "eval_steps": 500, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 0.5656170375645161, + "epoch": 0.03162055335968379, + "grad_norm": 487949.625, + "learning_rate": 0.000971608832807571, + "loss": 0.598, + "mean_token_accuracy": 0.8383935391902924, + "num_tokens": 611773.0, + "step": 10 + }, + { + "entropy": 0.5844515649601817, + "epoch": 0.06324110671936758, + "grad_norm": 3576.093505859375, + "learning_rate": 0.0009400630914826499, + "loss": 0.6717, + "mean_token_accuracy": 0.8245183542370796, + "num_tokens": 1210060.0, + "step": 20 + }, + { + "entropy": 0.4919601235538721, + "epoch": 0.09486166007905138, + "grad_norm": 462362592.0, + "learning_rate": 0.0009085173501577287, + "loss": 0.6075, + "mean_token_accuracy": 0.8355057552456856, + "num_tokens": 1815732.0, + "step": 30 + }, + { + "entropy": 0.45399096198379996, + "epoch": 0.12648221343873517, + "grad_norm": 48967.296875, + "learning_rate": 0.0008769716088328076, + "loss": 0.6877, + "mean_token_accuracy": 0.8247161574661732, + "num_tokens": 2372291.0, + "step": 40 + }, + { + "entropy": 0.5189430050551891, + "epoch": 0.15810276679841898, + "grad_norm": 20488920.0, + "learning_rate": 0.0008454258675078864, + "loss": 0.6631, + "mean_token_accuracy": 0.8253108873963356, + "num_tokens": 2970656.0, + "step": 50 + }, + { + "entropy": 0.5161575745791197, + "epoch": 0.18972332015810275, + "grad_norm": 11390319.0, + "learning_rate": 0.0008138801261829653, + "loss": 0.5682, + "mean_token_accuracy": 0.8430261947214603, + "num_tokens": 3561312.0, + "step": 60 + }, + { + "entropy": 0.5097578268498182, + "epoch": 0.22134387351778656, + "grad_norm": 5408220.5, + "learning_rate": 0.0007823343848580442, + "loss": 0.598, + "mean_token_accuracy": 0.8383074931800365, + "num_tokens": 4164666.0, + "step": 70 + }, + { + "entropy": 0.561078536324203, + "epoch": 0.25296442687747034, + "grad_norm": 3655628.0, + "learning_rate": 0.000750788643533123, + "loss": 0.5818, + "mean_token_accuracy": 0.8375455126166343, + "num_tokens": 4750115.0, + "step": 80 + }, + { + "entropy": 0.6730448313057422, + "epoch": 0.2845849802371542, + "grad_norm": 65684.3125, + "learning_rate": 0.0007192429022082018, + "loss": 0.6005, + "mean_token_accuracy": 0.8356033861637115, + "num_tokens": 5290377.0, + "step": 90 + }, + { + "entropy": 0.68943473957479, + "epoch": 0.31620553359683795, + "grad_norm": 18166769664.0, + "learning_rate": 0.0006876971608832808, + "loss": 0.6068, + "mean_token_accuracy": 0.8362394802272319, + "num_tokens": 5882068.0, + "step": 100 + }, + { + "entropy": 0.7109859976917505, + "epoch": 0.34782608695652173, + "grad_norm": 4711626.5, + "learning_rate": 0.0006561514195583596, + "loss": 0.6385, + "mean_token_accuracy": 0.829913080483675, + "num_tokens": 6480962.0, + "step": 110 + }, + { + "entropy": 0.6261379970237613, + "epoch": 0.3794466403162055, + "grad_norm": 1412058496.0, + "learning_rate": 0.0006246056782334385, + "loss": 0.5727, + "mean_token_accuracy": 0.8403497040271759, + "num_tokens": 7080708.0, + "step": 120 + }, + { + "entropy": 0.6068891424685716, + "epoch": 0.41106719367588934, + "grad_norm": 3915769.0, + "learning_rate": 0.0005930599369085173, + "loss": 0.5622, + "mean_token_accuracy": 0.8430461063981056, + "num_tokens": 7692408.0, + "step": 130 + }, + { + "entropy": 0.6536903701722622, + "epoch": 0.4426877470355731, + "grad_norm": 1457204.375, + "learning_rate": 0.0005615141955835961, + "loss": 0.6043, + "mean_token_accuracy": 0.833759855479002, + "num_tokens": 8329614.0, + "step": 140 + }, + { + "entropy": 0.646093986183405, + "epoch": 0.4743083003952569, + "grad_norm": 96467.4765625, + "learning_rate": 0.0005299684542586751, + "loss": 0.6169, + "mean_token_accuracy": 0.8327336788177491, + "num_tokens": 8933829.0, + "step": 150 + } + ], + "logging_steps": 10, + "max_steps": 317, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.7514421731368018e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-150/training_args.bin b/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..120f85ab118be2c00862d7334c96dd25039dfeea --- /dev/null +++ b/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bad5302c07edbe0267e2b38e65245429ed8bff1dce0fb4fc7af4aef2083cf5b +size 5880 diff --git a/checkpoint-200/README.md b/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..235c2e6ff7d5d996be0c01802fa2d7707c43b969 --- /dev/null +++ b/checkpoint-200/README.md @@ -0,0 +1,209 @@ +--- +base_model: zai-org/GLM-4-32B-0414 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:zai-org/GLM-4-32B-0414 +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-200/adapter_config.json b/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ebe86d1363a1171c6cd0dcf67ad18fa37c9f85 --- /dev/null +++ b/checkpoint-200/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "zai-org/GLM-4-32B-0414", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 256, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-200/adapter_model.safetensors b/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..398d81633f78d8b313d461671e4049d672151f35 --- /dev/null +++ b/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12bd816899c9a54d32bc94b934220703dfe70f37d60e5937ff2dd627a04c3f5b +size 4157688432 diff --git a/checkpoint-200/chat_template.jinja b/checkpoint-200/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..fdd583731de8bdf1830d38a3e34f7cd8f80605dd --- /dev/null +++ b/checkpoint-200/chat_template.jinja @@ -0,0 +1,41 @@ +[gMASK] +{%- if tools -%} +<|system|> +# 可用工具 +{% for tool in tools %} + {%- set function = tool.function if tool.get("function") else tool %} + +## {{ function.name }} + +{{ function | tojson(indent=4, ensure_ascii=False) }} +在调用上述函数时,请使用 Json 格式表示调用的参数。 +{%- endfor %} +{%- endif -%} + +{%- for msg in messages %} + {%- if msg.role == 'system' %} +<|system|> +{{ msg.content }} + {%- endif %} +{%- endfor %} + +{%- for message in messages if message.role != 'system' %} + {%- set role = message['role'] %} + {%- set content = message['content'] %} + {%- set meta = message.get("metadata", "") %} + + {%- if role == 'user' %} +<|user|> +{{ content }} + {%- elif role == 'assistant' and not meta %} +<|assistant|> +{{ content }} + {%- elif role == 'assistant' and meta %} +<|assistant|>{{ meta }} +{{ content }} + {%- elif role == 'observation' %} +<|observation|> +{{ content }} + {%- endif %} +{%- endfor %} +{% if add_generation_prompt %}<|assistant|>{% endif %} \ No newline at end of file diff --git a/checkpoint-200/optimizer.pt b/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..79acee2e692bbd46501ac9522191ea175f07f2b8 --- /dev/null +++ b/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea5aed5ec8915fcc34c36a4818ab3dbe7aa8958d6d46449313b9bd88ebcc754e +size 8315744524 diff --git a/checkpoint-200/rng_state.pth b/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-200/scheduler.pt b/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f51aa5fb688d3dbec7ffc6e5705c20b15e8f31c2 --- /dev/null +++ b/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39f3c1e44d50edabecccb1ec93a5215344aa9e14e6304ed79e9fc0fd54b5cb2 +size 1064 diff --git a/checkpoint-200/special_tokens_map.json b/checkpoint-200/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf953c2c8a3c89778c92e54c685942bb1130616 --- /dev/null +++ b/checkpoint-200/special_tokens_map.json @@ -0,0 +1,32 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "eos_token": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-200/tokenizer.json b/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1dde37a2715c11628fc84bf571976f9f80eb69 --- /dev/null +++ b/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c +size 19966496 diff --git a/checkpoint-200/tokenizer_config.json b/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d690b31eb34e3d9741aff67b37e0ecda2604739b --- /dev/null +++ b/checkpoint-200/tokenizer_config.json @@ -0,0 +1,145 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|user|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-200/trainer_state.json b/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..60acf4f7b8e6bf5315fcc7d78f0e3fbbabeffdf2 --- /dev/null +++ b/checkpoint-200/trainer_state.json @@ -0,0 +1,234 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6324110671936759, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 0.5656170375645161, + "epoch": 0.03162055335968379, + "grad_norm": 487949.625, + "learning_rate": 0.000971608832807571, + "loss": 0.598, + "mean_token_accuracy": 0.8383935391902924, + "num_tokens": 611773.0, + "step": 10 + }, + { + "entropy": 0.5844515649601817, + "epoch": 0.06324110671936758, + "grad_norm": 3576.093505859375, + "learning_rate": 0.0009400630914826499, + "loss": 0.6717, + "mean_token_accuracy": 0.8245183542370796, + "num_tokens": 1210060.0, + "step": 20 + }, + { + "entropy": 0.4919601235538721, + "epoch": 0.09486166007905138, + "grad_norm": 462362592.0, + "learning_rate": 0.0009085173501577287, + "loss": 0.6075, + "mean_token_accuracy": 0.8355057552456856, + "num_tokens": 1815732.0, + "step": 30 + }, + { + "entropy": 0.45399096198379996, + "epoch": 0.12648221343873517, + "grad_norm": 48967.296875, + "learning_rate": 0.0008769716088328076, + "loss": 0.6877, + "mean_token_accuracy": 0.8247161574661732, + "num_tokens": 2372291.0, + "step": 40 + }, + { + "entropy": 0.5189430050551891, + "epoch": 0.15810276679841898, + "grad_norm": 20488920.0, + "learning_rate": 0.0008454258675078864, + "loss": 0.6631, + "mean_token_accuracy": 0.8253108873963356, + "num_tokens": 2970656.0, + "step": 50 + }, + { + "entropy": 0.5161575745791197, + "epoch": 0.18972332015810275, + "grad_norm": 11390319.0, + "learning_rate": 0.0008138801261829653, + "loss": 0.5682, + "mean_token_accuracy": 0.8430261947214603, + "num_tokens": 3561312.0, + "step": 60 + }, + { + "entropy": 0.5097578268498182, + "epoch": 0.22134387351778656, + "grad_norm": 5408220.5, + "learning_rate": 0.0007823343848580442, + "loss": 0.598, + "mean_token_accuracy": 0.8383074931800365, + "num_tokens": 4164666.0, + "step": 70 + }, + { + "entropy": 0.561078536324203, + "epoch": 0.25296442687747034, + "grad_norm": 3655628.0, + "learning_rate": 0.000750788643533123, + "loss": 0.5818, + "mean_token_accuracy": 0.8375455126166343, + "num_tokens": 4750115.0, + "step": 80 + }, + { + "entropy": 0.6730448313057422, + "epoch": 0.2845849802371542, + "grad_norm": 65684.3125, + "learning_rate": 0.0007192429022082018, + "loss": 0.6005, + "mean_token_accuracy": 0.8356033861637115, + "num_tokens": 5290377.0, + "step": 90 + }, + { + "entropy": 0.68943473957479, + "epoch": 0.31620553359683795, + "grad_norm": 18166769664.0, + "learning_rate": 0.0006876971608832808, + "loss": 0.6068, + "mean_token_accuracy": 0.8362394802272319, + "num_tokens": 5882068.0, + "step": 100 + }, + { + "entropy": 0.7109859976917505, + "epoch": 0.34782608695652173, + "grad_norm": 4711626.5, + "learning_rate": 0.0006561514195583596, + "loss": 0.6385, + "mean_token_accuracy": 0.829913080483675, + "num_tokens": 6480962.0, + "step": 110 + }, + { + "entropy": 0.6261379970237613, + "epoch": 0.3794466403162055, + "grad_norm": 1412058496.0, + "learning_rate": 0.0006246056782334385, + "loss": 0.5727, + "mean_token_accuracy": 0.8403497040271759, + "num_tokens": 7080708.0, + "step": 120 + }, + { + "entropy": 0.6068891424685716, + "epoch": 0.41106719367588934, + "grad_norm": 3915769.0, + "learning_rate": 0.0005930599369085173, + "loss": 0.5622, + "mean_token_accuracy": 0.8430461063981056, + "num_tokens": 7692408.0, + "step": 130 + }, + { + "entropy": 0.6536903701722622, + "epoch": 0.4426877470355731, + "grad_norm": 1457204.375, + "learning_rate": 0.0005615141955835961, + "loss": 0.6043, + "mean_token_accuracy": 0.833759855479002, + "num_tokens": 8329614.0, + "step": 140 + }, + { + "entropy": 0.646093986183405, + "epoch": 0.4743083003952569, + "grad_norm": 96467.4765625, + "learning_rate": 0.0005299684542586751, + "loss": 0.6169, + "mean_token_accuracy": 0.8327336788177491, + "num_tokens": 8933829.0, + "step": 150 + }, + { + "entropy": 0.5853382866829634, + "epoch": 0.5059288537549407, + "grad_norm": 3098197.5, + "learning_rate": 0.000498422712933754, + "loss": 0.5469, + "mean_token_accuracy": 0.8443389609456062, + "num_tokens": 9522595.0, + "step": 160 + }, + { + "entropy": 0.5903366718441247, + "epoch": 0.5375494071146245, + "grad_norm": 42660388.0, + "learning_rate": 0.0004668769716088328, + "loss": 0.604, + "mean_token_accuracy": 0.8362459398806095, + "num_tokens": 10143758.0, + "step": 170 + }, + { + "entropy": 0.6122400458902121, + "epoch": 0.5691699604743083, + "grad_norm": 757395008.0, + "learning_rate": 0.0004353312302839117, + "loss": 0.6429, + "mean_token_accuracy": 0.8282213471829891, + "num_tokens": 10692662.0, + "step": 180 + }, + { + "entropy": 0.7004607111215592, + "epoch": 0.6007905138339921, + "grad_norm": 316442400.0, + "learning_rate": 0.0004037854889589905, + "loss": 0.6737, + "mean_token_accuracy": 0.8188323535025119, + "num_tokens": 11286778.0, + "step": 190 + }, + { + "entropy": 0.6795283857733011, + "epoch": 0.6324110671936759, + "grad_norm": 19859658752.0, + "learning_rate": 0.0003722397476340694, + "loss": 0.6629, + "mean_token_accuracy": 0.8223141871392727, + "num_tokens": 11910969.0, + "step": 200 + } + ], + "logging_steps": 10, + "max_steps": 317, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.3350988058451845e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-200/training_args.bin b/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..120f85ab118be2c00862d7334c96dd25039dfeea --- /dev/null +++ b/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bad5302c07edbe0267e2b38e65245429ed8bff1dce0fb4fc7af4aef2083cf5b +size 5880 diff --git a/checkpoint-250/README.md b/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..235c2e6ff7d5d996be0c01802fa2d7707c43b969 --- /dev/null +++ b/checkpoint-250/README.md @@ -0,0 +1,209 @@ +--- +base_model: zai-org/GLM-4-32B-0414 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:zai-org/GLM-4-32B-0414 +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-250/adapter_config.json b/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ebe86d1363a1171c6cd0dcf67ad18fa37c9f85 --- /dev/null +++ b/checkpoint-250/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "zai-org/GLM-4-32B-0414", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 256, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-250/adapter_model.safetensors b/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21830f9cbae1d6ad6793017a5e41dceeb9a0d3f1 --- /dev/null +++ b/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a08d60566d33dbc37e640ce49300f3bf1d9b9b22cb005d697b873b3f8e7bee3c +size 4157688432 diff --git a/checkpoint-250/chat_template.jinja b/checkpoint-250/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..fdd583731de8bdf1830d38a3e34f7cd8f80605dd --- /dev/null +++ b/checkpoint-250/chat_template.jinja @@ -0,0 +1,41 @@ +[gMASK] +{%- if tools -%} +<|system|> +# 可用工具 +{% for tool in tools %} + {%- set function = tool.function if tool.get("function") else tool %} + +## {{ function.name }} + +{{ function | tojson(indent=4, ensure_ascii=False) }} +在调用上述函数时,请使用 Json 格式表示调用的参数。 +{%- endfor %} +{%- endif -%} + +{%- for msg in messages %} + {%- if msg.role == 'system' %} +<|system|> +{{ msg.content }} + {%- endif %} +{%- endfor %} + +{%- for message in messages if message.role != 'system' %} + {%- set role = message['role'] %} + {%- set content = message['content'] %} + {%- set meta = message.get("metadata", "") %} + + {%- if role == 'user' %} +<|user|> +{{ content }} + {%- elif role == 'assistant' and not meta %} +<|assistant|> +{{ content }} + {%- elif role == 'assistant' and meta %} +<|assistant|>{{ meta }} +{{ content }} + {%- elif role == 'observation' %} +<|observation|> +{{ content }} + {%- endif %} +{%- endfor %} +{% if add_generation_prompt %}<|assistant|>{% endif %} \ No newline at end of file diff --git a/checkpoint-250/optimizer.pt b/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..647ac8792d024fad024fa9d150aceffc4fac3d8f --- /dev/null +++ b/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00d691beb57ed010bd1715b1f65fb9839c6a7ad1bc55782f5be99f049989cd14 +size 8315744524 diff --git a/checkpoint-250/rng_state.pth b/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-250/scheduler.pt b/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4692a0a0ab88ee21ae7fd5d776ad21cf7bbeed7e --- /dev/null +++ b/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0312fddf7060481c73cb2a1291fdb26851aa4d528ad7c394a28b2a0b83a65101 +size 1064 diff --git a/checkpoint-250/special_tokens_map.json b/checkpoint-250/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf953c2c8a3c89778c92e54c685942bb1130616 --- /dev/null +++ b/checkpoint-250/special_tokens_map.json @@ -0,0 +1,32 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "eos_token": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-250/tokenizer.json b/checkpoint-250/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1dde37a2715c11628fc84bf571976f9f80eb69 --- /dev/null +++ b/checkpoint-250/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c +size 19966496 diff --git a/checkpoint-250/tokenizer_config.json b/checkpoint-250/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d690b31eb34e3d9741aff67b37e0ecda2604739b --- /dev/null +++ b/checkpoint-250/tokenizer_config.json @@ -0,0 +1,145 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|user|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-250/trainer_state.json b/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dcd62f926e7d4e3bdf85eb83a1b51809748d992c --- /dev/null +++ b/checkpoint-250/trainer_state.json @@ -0,0 +1,284 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7905138339920948, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 0.5656170375645161, + "epoch": 0.03162055335968379, + "grad_norm": 487949.625, + "learning_rate": 0.000971608832807571, + "loss": 0.598, + "mean_token_accuracy": 0.8383935391902924, + "num_tokens": 611773.0, + "step": 10 + }, + { + "entropy": 0.5844515649601817, + "epoch": 0.06324110671936758, + "grad_norm": 3576.093505859375, + "learning_rate": 0.0009400630914826499, + "loss": 0.6717, + "mean_token_accuracy": 0.8245183542370796, + "num_tokens": 1210060.0, + "step": 20 + }, + { + "entropy": 0.4919601235538721, + "epoch": 0.09486166007905138, + "grad_norm": 462362592.0, + "learning_rate": 0.0009085173501577287, + "loss": 0.6075, + "mean_token_accuracy": 0.8355057552456856, + "num_tokens": 1815732.0, + "step": 30 + }, + { + "entropy": 0.45399096198379996, + "epoch": 0.12648221343873517, + "grad_norm": 48967.296875, + "learning_rate": 0.0008769716088328076, + "loss": 0.6877, + "mean_token_accuracy": 0.8247161574661732, + "num_tokens": 2372291.0, + "step": 40 + }, + { + "entropy": 0.5189430050551891, + "epoch": 0.15810276679841898, + "grad_norm": 20488920.0, + "learning_rate": 0.0008454258675078864, + "loss": 0.6631, + "mean_token_accuracy": 0.8253108873963356, + "num_tokens": 2970656.0, + "step": 50 + }, + { + "entropy": 0.5161575745791197, + "epoch": 0.18972332015810275, + "grad_norm": 11390319.0, + "learning_rate": 0.0008138801261829653, + "loss": 0.5682, + "mean_token_accuracy": 0.8430261947214603, + "num_tokens": 3561312.0, + "step": 60 + }, + { + "entropy": 0.5097578268498182, + "epoch": 0.22134387351778656, + "grad_norm": 5408220.5, + "learning_rate": 0.0007823343848580442, + "loss": 0.598, + "mean_token_accuracy": 0.8383074931800365, + "num_tokens": 4164666.0, + "step": 70 + }, + { + "entropy": 0.561078536324203, + "epoch": 0.25296442687747034, + "grad_norm": 3655628.0, + "learning_rate": 0.000750788643533123, + "loss": 0.5818, + "mean_token_accuracy": 0.8375455126166343, + "num_tokens": 4750115.0, + "step": 80 + }, + { + "entropy": 0.6730448313057422, + "epoch": 0.2845849802371542, + "grad_norm": 65684.3125, + "learning_rate": 0.0007192429022082018, + "loss": 0.6005, + "mean_token_accuracy": 0.8356033861637115, + "num_tokens": 5290377.0, + "step": 90 + }, + { + "entropy": 0.68943473957479, + "epoch": 0.31620553359683795, + "grad_norm": 18166769664.0, + "learning_rate": 0.0006876971608832808, + "loss": 0.6068, + "mean_token_accuracy": 0.8362394802272319, + "num_tokens": 5882068.0, + "step": 100 + }, + { + "entropy": 0.7109859976917505, + "epoch": 0.34782608695652173, + "grad_norm": 4711626.5, + "learning_rate": 0.0006561514195583596, + "loss": 0.6385, + "mean_token_accuracy": 0.829913080483675, + "num_tokens": 6480962.0, + "step": 110 + }, + { + "entropy": 0.6261379970237613, + "epoch": 0.3794466403162055, + "grad_norm": 1412058496.0, + "learning_rate": 0.0006246056782334385, + "loss": 0.5727, + "mean_token_accuracy": 0.8403497040271759, + "num_tokens": 7080708.0, + "step": 120 + }, + { + "entropy": 0.6068891424685716, + "epoch": 0.41106719367588934, + "grad_norm": 3915769.0, + "learning_rate": 0.0005930599369085173, + "loss": 0.5622, + "mean_token_accuracy": 0.8430461063981056, + "num_tokens": 7692408.0, + "step": 130 + }, + { + "entropy": 0.6536903701722622, + "epoch": 0.4426877470355731, + "grad_norm": 1457204.375, + "learning_rate": 0.0005615141955835961, + "loss": 0.6043, + "mean_token_accuracy": 0.833759855479002, + "num_tokens": 8329614.0, + "step": 140 + }, + { + "entropy": 0.646093986183405, + "epoch": 0.4743083003952569, + "grad_norm": 96467.4765625, + "learning_rate": 0.0005299684542586751, + "loss": 0.6169, + "mean_token_accuracy": 0.8327336788177491, + "num_tokens": 8933829.0, + "step": 150 + }, + { + "entropy": 0.5853382866829634, + "epoch": 0.5059288537549407, + "grad_norm": 3098197.5, + "learning_rate": 0.000498422712933754, + "loss": 0.5469, + "mean_token_accuracy": 0.8443389609456062, + "num_tokens": 9522595.0, + "step": 160 + }, + { + "entropy": 0.5903366718441247, + "epoch": 0.5375494071146245, + "grad_norm": 42660388.0, + "learning_rate": 0.0004668769716088328, + "loss": 0.604, + "mean_token_accuracy": 0.8362459398806095, + "num_tokens": 10143758.0, + "step": 170 + }, + { + "entropy": 0.6122400458902121, + "epoch": 0.5691699604743083, + "grad_norm": 757395008.0, + "learning_rate": 0.0004353312302839117, + "loss": 0.6429, + "mean_token_accuracy": 0.8282213471829891, + "num_tokens": 10692662.0, + "step": 180 + }, + { + "entropy": 0.7004607111215592, + "epoch": 0.6007905138339921, + "grad_norm": 316442400.0, + "learning_rate": 0.0004037854889589905, + "loss": 0.6737, + "mean_token_accuracy": 0.8188323535025119, + "num_tokens": 11286778.0, + "step": 190 + }, + { + "entropy": 0.6795283857733011, + "epoch": 0.6324110671936759, + "grad_norm": 19859658752.0, + "learning_rate": 0.0003722397476340694, + "loss": 0.6629, + "mean_token_accuracy": 0.8223141871392727, + "num_tokens": 11910969.0, + "step": 200 + }, + { + "entropy": 0.6172661986202002, + "epoch": 0.6640316205533597, + "grad_norm": 596563.25, + "learning_rate": 0.00034069400630914825, + "loss": 0.6219, + "mean_token_accuracy": 0.8312948845326901, + "num_tokens": 12506898.0, + "step": 210 + }, + { + "entropy": 0.5889476146548986, + "epoch": 0.6956521739130435, + "grad_norm": 38292772.0, + "learning_rate": 0.00030914826498422714, + "loss": 0.6256, + "mean_token_accuracy": 0.833721686899662, + "num_tokens": 13097833.0, + "step": 220 + }, + { + "entropy": 0.5375086467713117, + "epoch": 0.7272727272727273, + "grad_norm": 7002747.5, + "learning_rate": 0.00027760252365930597, + "loss": 0.5423, + "mean_token_accuracy": 0.8461122632026672, + "num_tokens": 13690464.0, + "step": 230 + }, + { + "entropy": 0.5905131205916405, + "epoch": 0.758893280632411, + "grad_norm": 4201593.0, + "learning_rate": 0.00024605678233438486, + "loss": 0.5946, + "mean_token_accuracy": 0.8356382519006729, + "num_tokens": 14269150.0, + "step": 240 + }, + { + "entropy": 0.553895766660571, + "epoch": 0.7905138339920948, + "grad_norm": 7530.97509765625, + "learning_rate": 0.00021451104100946372, + "loss": 0.5512, + "mean_token_accuracy": 0.8458476938307286, + "num_tokens": 14877904.0, + "step": 250 + } + ], + "logging_steps": 10, + "max_steps": 317, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.9167547882862674e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-250/training_args.bin b/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..120f85ab118be2c00862d7334c96dd25039dfeea --- /dev/null +++ b/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bad5302c07edbe0267e2b38e65245429ed8bff1dce0fb4fc7af4aef2083cf5b +size 5880 diff --git a/checkpoint-300/README.md b/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..235c2e6ff7d5d996be0c01802fa2d7707c43b969 --- /dev/null +++ b/checkpoint-300/README.md @@ -0,0 +1,209 @@ +--- +base_model: zai-org/GLM-4-32B-0414 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:zai-org/GLM-4-32B-0414 +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-300/adapter_config.json b/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ebe86d1363a1171c6cd0dcf67ad18fa37c9f85 --- /dev/null +++ b/checkpoint-300/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "zai-org/GLM-4-32B-0414", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 256, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-300/adapter_model.safetensors b/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..42a96ecea97520b64919c6d3ad787c32c07fcdb2 --- /dev/null +++ b/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dacad7e648d2e356cf5f7410762ca618857405654e5a0afa2781e629eb1f9efb +size 4157688432 diff --git a/checkpoint-300/chat_template.jinja b/checkpoint-300/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..fdd583731de8bdf1830d38a3e34f7cd8f80605dd --- /dev/null +++ b/checkpoint-300/chat_template.jinja @@ -0,0 +1,41 @@ +[gMASK] +{%- if tools -%} +<|system|> +# 可用工具 +{% for tool in tools %} + {%- set function = tool.function if tool.get("function") else tool %} + +## {{ function.name }} + +{{ function | tojson(indent=4, ensure_ascii=False) }} +在调用上述函数时,请使用 Json 格式表示调用的参数。 +{%- endfor %} +{%- endif -%} + +{%- for msg in messages %} + {%- if msg.role == 'system' %} +<|system|> +{{ msg.content }} + {%- endif %} +{%- endfor %} + +{%- for message in messages if message.role != 'system' %} + {%- set role = message['role'] %} + {%- set content = message['content'] %} + {%- set meta = message.get("metadata", "") %} + + {%- if role == 'user' %} +<|user|> +{{ content }} + {%- elif role == 'assistant' and not meta %} +<|assistant|> +{{ content }} + {%- elif role == 'assistant' and meta %} +<|assistant|>{{ meta }} +{{ content }} + {%- elif role == 'observation' %} +<|observation|> +{{ content }} + {%- endif %} +{%- endfor %} +{% if add_generation_prompt %}<|assistant|>{% endif %} \ No newline at end of file diff --git a/checkpoint-300/optimizer.pt b/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2501924a662dbbfb0359359731a23aa517ea92e7 --- /dev/null +++ b/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3333bf5f531f60a6fc3d740ab135365c6df439918937a954bfe5193cdc0f2410 +size 8315744524 diff --git a/checkpoint-300/rng_state.pth b/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-300/scheduler.pt b/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b3e9b9e5fad1c054f0626489f3f4fbeed34dd83 --- /dev/null +++ b/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b6009fc7186316152f008e93444f14b13a9bc780422a9a3551f8a4615e2a8f +size 1064 diff --git a/checkpoint-300/special_tokens_map.json b/checkpoint-300/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf953c2c8a3c89778c92e54c685942bb1130616 --- /dev/null +++ b/checkpoint-300/special_tokens_map.json @@ -0,0 +1,32 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "eos_token": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-300/tokenizer.json b/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1dde37a2715c11628fc84bf571976f9f80eb69 --- /dev/null +++ b/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c +size 19966496 diff --git a/checkpoint-300/tokenizer_config.json b/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d690b31eb34e3d9741aff67b37e0ecda2604739b --- /dev/null +++ b/checkpoint-300/tokenizer_config.json @@ -0,0 +1,145 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|user|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-300/trainer_state.json b/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3902a72dcdb56f1c572d12e39feef2d24121a58d --- /dev/null +++ b/checkpoint-300/trainer_state.json @@ -0,0 +1,334 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9486166007905138, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 0.5656170375645161, + "epoch": 0.03162055335968379, + "grad_norm": 487949.625, + "learning_rate": 0.000971608832807571, + "loss": 0.598, + "mean_token_accuracy": 0.8383935391902924, + "num_tokens": 611773.0, + "step": 10 + }, + { + "entropy": 0.5844515649601817, + "epoch": 0.06324110671936758, + "grad_norm": 3576.093505859375, + "learning_rate": 0.0009400630914826499, + "loss": 0.6717, + "mean_token_accuracy": 0.8245183542370796, + "num_tokens": 1210060.0, + "step": 20 + }, + { + "entropy": 0.4919601235538721, + "epoch": 0.09486166007905138, + "grad_norm": 462362592.0, + "learning_rate": 0.0009085173501577287, + "loss": 0.6075, + "mean_token_accuracy": 0.8355057552456856, + "num_tokens": 1815732.0, + "step": 30 + }, + { + "entropy": 0.45399096198379996, + "epoch": 0.12648221343873517, + "grad_norm": 48967.296875, + "learning_rate": 0.0008769716088328076, + "loss": 0.6877, + "mean_token_accuracy": 0.8247161574661732, + "num_tokens": 2372291.0, + "step": 40 + }, + { + "entropy": 0.5189430050551891, + "epoch": 0.15810276679841898, + "grad_norm": 20488920.0, + "learning_rate": 0.0008454258675078864, + "loss": 0.6631, + "mean_token_accuracy": 0.8253108873963356, + "num_tokens": 2970656.0, + "step": 50 + }, + { + "entropy": 0.5161575745791197, + "epoch": 0.18972332015810275, + "grad_norm": 11390319.0, + "learning_rate": 0.0008138801261829653, + "loss": 0.5682, + "mean_token_accuracy": 0.8430261947214603, + "num_tokens": 3561312.0, + "step": 60 + }, + { + "entropy": 0.5097578268498182, + "epoch": 0.22134387351778656, + "grad_norm": 5408220.5, + "learning_rate": 0.0007823343848580442, + "loss": 0.598, + "mean_token_accuracy": 0.8383074931800365, + "num_tokens": 4164666.0, + "step": 70 + }, + { + "entropy": 0.561078536324203, + "epoch": 0.25296442687747034, + "grad_norm": 3655628.0, + "learning_rate": 0.000750788643533123, + "loss": 0.5818, + "mean_token_accuracy": 0.8375455126166343, + "num_tokens": 4750115.0, + "step": 80 + }, + { + "entropy": 0.6730448313057422, + "epoch": 0.2845849802371542, + "grad_norm": 65684.3125, + "learning_rate": 0.0007192429022082018, + "loss": 0.6005, + "mean_token_accuracy": 0.8356033861637115, + "num_tokens": 5290377.0, + "step": 90 + }, + { + "entropy": 0.68943473957479, + "epoch": 0.31620553359683795, + "grad_norm": 18166769664.0, + "learning_rate": 0.0006876971608832808, + "loss": 0.6068, + "mean_token_accuracy": 0.8362394802272319, + "num_tokens": 5882068.0, + "step": 100 + }, + { + "entropy": 0.7109859976917505, + "epoch": 0.34782608695652173, + "grad_norm": 4711626.5, + "learning_rate": 0.0006561514195583596, + "loss": 0.6385, + "mean_token_accuracy": 0.829913080483675, + "num_tokens": 6480962.0, + "step": 110 + }, + { + "entropy": 0.6261379970237613, + "epoch": 0.3794466403162055, + "grad_norm": 1412058496.0, + "learning_rate": 0.0006246056782334385, + "loss": 0.5727, + "mean_token_accuracy": 0.8403497040271759, + "num_tokens": 7080708.0, + "step": 120 + }, + { + "entropy": 0.6068891424685716, + "epoch": 0.41106719367588934, + "grad_norm": 3915769.0, + "learning_rate": 0.0005930599369085173, + "loss": 0.5622, + "mean_token_accuracy": 0.8430461063981056, + "num_tokens": 7692408.0, + "step": 130 + }, + { + "entropy": 0.6536903701722622, + "epoch": 0.4426877470355731, + "grad_norm": 1457204.375, + "learning_rate": 0.0005615141955835961, + "loss": 0.6043, + "mean_token_accuracy": 0.833759855479002, + "num_tokens": 8329614.0, + "step": 140 + }, + { + "entropy": 0.646093986183405, + "epoch": 0.4743083003952569, + "grad_norm": 96467.4765625, + "learning_rate": 0.0005299684542586751, + "loss": 0.6169, + "mean_token_accuracy": 0.8327336788177491, + "num_tokens": 8933829.0, + "step": 150 + }, + { + "entropy": 0.5853382866829634, + "epoch": 0.5059288537549407, + "grad_norm": 3098197.5, + "learning_rate": 0.000498422712933754, + "loss": 0.5469, + "mean_token_accuracy": 0.8443389609456062, + "num_tokens": 9522595.0, + "step": 160 + }, + { + "entropy": 0.5903366718441247, + "epoch": 0.5375494071146245, + "grad_norm": 42660388.0, + "learning_rate": 0.0004668769716088328, + "loss": 0.604, + "mean_token_accuracy": 0.8362459398806095, + "num_tokens": 10143758.0, + "step": 170 + }, + { + "entropy": 0.6122400458902121, + "epoch": 0.5691699604743083, + "grad_norm": 757395008.0, + "learning_rate": 0.0004353312302839117, + "loss": 0.6429, + "mean_token_accuracy": 0.8282213471829891, + "num_tokens": 10692662.0, + "step": 180 + }, + { + "entropy": 0.7004607111215592, + "epoch": 0.6007905138339921, + "grad_norm": 316442400.0, + "learning_rate": 0.0004037854889589905, + "loss": 0.6737, + "mean_token_accuracy": 0.8188323535025119, + "num_tokens": 11286778.0, + "step": 190 + }, + { + "entropy": 0.6795283857733011, + "epoch": 0.6324110671936759, + "grad_norm": 19859658752.0, + "learning_rate": 0.0003722397476340694, + "loss": 0.6629, + "mean_token_accuracy": 0.8223141871392727, + "num_tokens": 11910969.0, + "step": 200 + }, + { + "entropy": 0.6172661986202002, + "epoch": 0.6640316205533597, + "grad_norm": 596563.25, + "learning_rate": 0.00034069400630914825, + "loss": 0.6219, + "mean_token_accuracy": 0.8312948845326901, + "num_tokens": 12506898.0, + "step": 210 + }, + { + "entropy": 0.5889476146548986, + "epoch": 0.6956521739130435, + "grad_norm": 38292772.0, + "learning_rate": 0.00030914826498422714, + "loss": 0.6256, + "mean_token_accuracy": 0.833721686899662, + "num_tokens": 13097833.0, + "step": 220 + }, + { + "entropy": 0.5375086467713117, + "epoch": 0.7272727272727273, + "grad_norm": 7002747.5, + "learning_rate": 0.00027760252365930597, + "loss": 0.5423, + "mean_token_accuracy": 0.8461122632026672, + "num_tokens": 13690464.0, + "step": 230 + }, + { + "entropy": 0.5905131205916405, + "epoch": 0.758893280632411, + "grad_norm": 4201593.0, + "learning_rate": 0.00024605678233438486, + "loss": 0.5946, + "mean_token_accuracy": 0.8356382519006729, + "num_tokens": 14269150.0, + "step": 240 + }, + { + "entropy": 0.553895766660571, + "epoch": 0.7905138339920948, + "grad_norm": 7530.97509765625, + "learning_rate": 0.00021451104100946372, + "loss": 0.5512, + "mean_token_accuracy": 0.8458476938307286, + "num_tokens": 14877904.0, + "step": 250 + }, + { + "entropy": 0.6424776270985604, + "epoch": 0.8221343873517787, + "grad_norm": 30025678.0, + "learning_rate": 0.00018296529968454258, + "loss": 0.6752, + "mean_token_accuracy": 0.8269011005759239, + "num_tokens": 15470400.0, + "step": 260 + }, + { + "entropy": 0.6024509601294994, + "epoch": 0.8537549407114624, + "grad_norm": 179891.234375, + "learning_rate": 0.00015141955835962145, + "loss": 0.6077, + "mean_token_accuracy": 0.8359923847019672, + "num_tokens": 16078040.0, + "step": 270 + }, + { + "entropy": 0.6116770006716251, + "epoch": 0.8853754940711462, + "grad_norm": 43860216.0, + "learning_rate": 0.00011987381703470032, + "loss": 0.6142, + "mean_token_accuracy": 0.8328806236386299, + "num_tokens": 16666971.0, + "step": 280 + }, + { + "entropy": 0.593284934386611, + "epoch": 0.9169960474308301, + "grad_norm": 2078294.875, + "learning_rate": 8.832807570977918e-05, + "loss": 0.6079, + "mean_token_accuracy": 0.8367695853114128, + "num_tokens": 17271306.0, + "step": 290 + }, + { + "entropy": 0.5837210457772016, + "epoch": 0.9486166007905138, + "grad_norm": 2131766.0, + "learning_rate": 5.6782334384858046e-05, + "loss": 0.5676, + "mean_token_accuracy": 0.8406473062932491, + "num_tokens": 17878825.0, + "step": 300 + } + ], + "logging_steps": 10, + "max_steps": 317, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.5050735928718336e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-300/training_args.bin b/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..120f85ab118be2c00862d7334c96dd25039dfeea --- /dev/null +++ b/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bad5302c07edbe0267e2b38e65245429ed8bff1dce0fb4fc7af4aef2083cf5b +size 5880 diff --git a/checkpoint-317/README.md b/checkpoint-317/README.md new file mode 100644 index 0000000000000000000000000000000000000000..235c2e6ff7d5d996be0c01802fa2d7707c43b969 --- /dev/null +++ b/checkpoint-317/README.md @@ -0,0 +1,209 @@ +--- +base_model: zai-org/GLM-4-32B-0414 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:zai-org/GLM-4-32B-0414 +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-317/adapter_config.json b/checkpoint-317/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ebe86d1363a1171c6cd0dcf67ad18fa37c9f85 --- /dev/null +++ b/checkpoint-317/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "zai-org/GLM-4-32B-0414", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 256, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-317/adapter_model.safetensors b/checkpoint-317/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b59a542639a167049f8305880161855fb8df2bc6 --- /dev/null +++ b/checkpoint-317/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0449cadcb46edd1c553d548c9e5994006bddec56295db09c6773bfb8407c8b75 +size 4157688432 diff --git a/checkpoint-317/chat_template.jinja b/checkpoint-317/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..fdd583731de8bdf1830d38a3e34f7cd8f80605dd --- /dev/null +++ b/checkpoint-317/chat_template.jinja @@ -0,0 +1,41 @@ +[gMASK] +{%- if tools -%} +<|system|> +# 可用工具 +{% for tool in tools %} + {%- set function = tool.function if tool.get("function") else tool %} + +## {{ function.name }} + +{{ function | tojson(indent=4, ensure_ascii=False) }} +在调用上述函数时,请使用 Json 格式表示调用的参数。 +{%- endfor %} +{%- endif -%} + +{%- for msg in messages %} + {%- if msg.role == 'system' %} +<|system|> +{{ msg.content }} + {%- endif %} +{%- endfor %} + +{%- for message in messages if message.role != 'system' %} + {%- set role = message['role'] %} + {%- set content = message['content'] %} + {%- set meta = message.get("metadata", "") %} + + {%- if role == 'user' %} +<|user|> +{{ content }} + {%- elif role == 'assistant' and not meta %} +<|assistant|> +{{ content }} + {%- elif role == 'assistant' and meta %} +<|assistant|>{{ meta }} +{{ content }} + {%- elif role == 'observation' %} +<|observation|> +{{ content }} + {%- endif %} +{%- endfor %} +{% if add_generation_prompt %}<|assistant|>{% endif %} \ No newline at end of file diff --git a/checkpoint-317/optimizer.pt b/checkpoint-317/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6681cdcba292dc574d55258541a9f28730dc6a81 --- /dev/null +++ b/checkpoint-317/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:babbff104872058ce9f0dbaef97484e24519b4b4156a974f734f6f05f9cd76fd +size 8315744524 diff --git a/checkpoint-317/rng_state.pth b/checkpoint-317/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-317/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-317/scheduler.pt b/checkpoint-317/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4317cc47a2228463b9db3c973c24f7d7decfe1c5 --- /dev/null +++ b/checkpoint-317/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15d705da2903d14f5fb7a6e8985263984e21045c6722f90267e9774272447190 +size 1064 diff --git a/checkpoint-317/special_tokens_map.json b/checkpoint-317/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf953c2c8a3c89778c92e54c685942bb1130616 --- /dev/null +++ b/checkpoint-317/special_tokens_map.json @@ -0,0 +1,32 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "eos_token": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-317/tokenizer.json b/checkpoint-317/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1dde37a2715c11628fc84bf571976f9f80eb69 --- /dev/null +++ b/checkpoint-317/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c +size 19966496 diff --git a/checkpoint-317/tokenizer_config.json b/checkpoint-317/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d690b31eb34e3d9741aff67b37e0ecda2604739b --- /dev/null +++ b/checkpoint-317/tokenizer_config.json @@ -0,0 +1,145 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|user|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-317/trainer_state.json b/checkpoint-317/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6f236338799047bd16ab007ae583935c4cf4440c --- /dev/null +++ b/checkpoint-317/trainer_state.json @@ -0,0 +1,344 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 317, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 0.5656170375645161, + "epoch": 0.03162055335968379, + "grad_norm": 487949.625, + "learning_rate": 0.000971608832807571, + "loss": 0.598, + "mean_token_accuracy": 0.8383935391902924, + "num_tokens": 611773.0, + "step": 10 + }, + { + "entropy": 0.5844515649601817, + "epoch": 0.06324110671936758, + "grad_norm": 3576.093505859375, + "learning_rate": 0.0009400630914826499, + "loss": 0.6717, + "mean_token_accuracy": 0.8245183542370796, + "num_tokens": 1210060.0, + "step": 20 + }, + { + "entropy": 0.4919601235538721, + "epoch": 0.09486166007905138, + "grad_norm": 462362592.0, + "learning_rate": 0.0009085173501577287, + "loss": 0.6075, + "mean_token_accuracy": 0.8355057552456856, + "num_tokens": 1815732.0, + "step": 30 + }, + { + "entropy": 0.45399096198379996, + "epoch": 0.12648221343873517, + "grad_norm": 48967.296875, + "learning_rate": 0.0008769716088328076, + "loss": 0.6877, + "mean_token_accuracy": 0.8247161574661732, + "num_tokens": 2372291.0, + "step": 40 + }, + { + "entropy": 0.5189430050551891, + "epoch": 0.15810276679841898, + "grad_norm": 20488920.0, + "learning_rate": 0.0008454258675078864, + "loss": 0.6631, + "mean_token_accuracy": 0.8253108873963356, + "num_tokens": 2970656.0, + "step": 50 + }, + { + "entropy": 0.5161575745791197, + "epoch": 0.18972332015810275, + "grad_norm": 11390319.0, + "learning_rate": 0.0008138801261829653, + "loss": 0.5682, + "mean_token_accuracy": 0.8430261947214603, + "num_tokens": 3561312.0, + "step": 60 + }, + { + "entropy": 0.5097578268498182, + "epoch": 0.22134387351778656, + "grad_norm": 5408220.5, + "learning_rate": 0.0007823343848580442, + "loss": 0.598, + "mean_token_accuracy": 0.8383074931800365, + "num_tokens": 4164666.0, + "step": 70 + }, + { + "entropy": 0.561078536324203, + "epoch": 0.25296442687747034, + "grad_norm": 3655628.0, + "learning_rate": 0.000750788643533123, + "loss": 0.5818, + "mean_token_accuracy": 0.8375455126166343, + "num_tokens": 4750115.0, + "step": 80 + }, + { + "entropy": 0.6730448313057422, + "epoch": 0.2845849802371542, + "grad_norm": 65684.3125, + "learning_rate": 0.0007192429022082018, + "loss": 0.6005, + "mean_token_accuracy": 0.8356033861637115, + "num_tokens": 5290377.0, + "step": 90 + }, + { + "entropy": 0.68943473957479, + "epoch": 0.31620553359683795, + "grad_norm": 18166769664.0, + "learning_rate": 0.0006876971608832808, + "loss": 0.6068, + "mean_token_accuracy": 0.8362394802272319, + "num_tokens": 5882068.0, + "step": 100 + }, + { + "entropy": 0.7109859976917505, + "epoch": 0.34782608695652173, + "grad_norm": 4711626.5, + "learning_rate": 0.0006561514195583596, + "loss": 0.6385, + "mean_token_accuracy": 0.829913080483675, + "num_tokens": 6480962.0, + "step": 110 + }, + { + "entropy": 0.6261379970237613, + "epoch": 0.3794466403162055, + "grad_norm": 1412058496.0, + "learning_rate": 0.0006246056782334385, + "loss": 0.5727, + "mean_token_accuracy": 0.8403497040271759, + "num_tokens": 7080708.0, + "step": 120 + }, + { + "entropy": 0.6068891424685716, + "epoch": 0.41106719367588934, + "grad_norm": 3915769.0, + "learning_rate": 0.0005930599369085173, + "loss": 0.5622, + "mean_token_accuracy": 0.8430461063981056, + "num_tokens": 7692408.0, + "step": 130 + }, + { + "entropy": 0.6536903701722622, + "epoch": 0.4426877470355731, + "grad_norm": 1457204.375, + "learning_rate": 0.0005615141955835961, + "loss": 0.6043, + "mean_token_accuracy": 0.833759855479002, + "num_tokens": 8329614.0, + "step": 140 + }, + { + "entropy": 0.646093986183405, + "epoch": 0.4743083003952569, + "grad_norm": 96467.4765625, + "learning_rate": 0.0005299684542586751, + "loss": 0.6169, + "mean_token_accuracy": 0.8327336788177491, + "num_tokens": 8933829.0, + "step": 150 + }, + { + "entropy": 0.5853382866829634, + "epoch": 0.5059288537549407, + "grad_norm": 3098197.5, + "learning_rate": 0.000498422712933754, + "loss": 0.5469, + "mean_token_accuracy": 0.8443389609456062, + "num_tokens": 9522595.0, + "step": 160 + }, + { + "entropy": 0.5903366718441247, + "epoch": 0.5375494071146245, + "grad_norm": 42660388.0, + "learning_rate": 0.0004668769716088328, + "loss": 0.604, + "mean_token_accuracy": 0.8362459398806095, + "num_tokens": 10143758.0, + "step": 170 + }, + { + "entropy": 0.6122400458902121, + "epoch": 0.5691699604743083, + "grad_norm": 757395008.0, + "learning_rate": 0.0004353312302839117, + "loss": 0.6429, + "mean_token_accuracy": 0.8282213471829891, + "num_tokens": 10692662.0, + "step": 180 + }, + { + "entropy": 0.7004607111215592, + "epoch": 0.6007905138339921, + "grad_norm": 316442400.0, + "learning_rate": 0.0004037854889589905, + "loss": 0.6737, + "mean_token_accuracy": 0.8188323535025119, + "num_tokens": 11286778.0, + "step": 190 + }, + { + "entropy": 0.6795283857733011, + "epoch": 0.6324110671936759, + "grad_norm": 19859658752.0, + "learning_rate": 0.0003722397476340694, + "loss": 0.6629, + "mean_token_accuracy": 0.8223141871392727, + "num_tokens": 11910969.0, + "step": 200 + }, + { + "entropy": 0.6172661986202002, + "epoch": 0.6640316205533597, + "grad_norm": 596563.25, + "learning_rate": 0.00034069400630914825, + "loss": 0.6219, + "mean_token_accuracy": 0.8312948845326901, + "num_tokens": 12506898.0, + "step": 210 + }, + { + "entropy": 0.5889476146548986, + "epoch": 0.6956521739130435, + "grad_norm": 38292772.0, + "learning_rate": 0.00030914826498422714, + "loss": 0.6256, + "mean_token_accuracy": 0.833721686899662, + "num_tokens": 13097833.0, + "step": 220 + }, + { + "entropy": 0.5375086467713117, + "epoch": 0.7272727272727273, + "grad_norm": 7002747.5, + "learning_rate": 0.00027760252365930597, + "loss": 0.5423, + "mean_token_accuracy": 0.8461122632026672, + "num_tokens": 13690464.0, + "step": 230 + }, + { + "entropy": 0.5905131205916405, + "epoch": 0.758893280632411, + "grad_norm": 4201593.0, + "learning_rate": 0.00024605678233438486, + "loss": 0.5946, + "mean_token_accuracy": 0.8356382519006729, + "num_tokens": 14269150.0, + "step": 240 + }, + { + "entropy": 0.553895766660571, + "epoch": 0.7905138339920948, + "grad_norm": 7530.97509765625, + "learning_rate": 0.00021451104100946372, + "loss": 0.5512, + "mean_token_accuracy": 0.8458476938307286, + "num_tokens": 14877904.0, + "step": 250 + }, + { + "entropy": 0.6424776270985604, + "epoch": 0.8221343873517787, + "grad_norm": 30025678.0, + "learning_rate": 0.00018296529968454258, + "loss": 0.6752, + "mean_token_accuracy": 0.8269011005759239, + "num_tokens": 15470400.0, + "step": 260 + }, + { + "entropy": 0.6024509601294994, + "epoch": 0.8537549407114624, + "grad_norm": 179891.234375, + "learning_rate": 0.00015141955835962145, + "loss": 0.6077, + "mean_token_accuracy": 0.8359923847019672, + "num_tokens": 16078040.0, + "step": 270 + }, + { + "entropy": 0.6116770006716251, + "epoch": 0.8853754940711462, + "grad_norm": 43860216.0, + "learning_rate": 0.00011987381703470032, + "loss": 0.6142, + "mean_token_accuracy": 0.8328806236386299, + "num_tokens": 16666971.0, + "step": 280 + }, + { + "entropy": 0.593284934386611, + "epoch": 0.9169960474308301, + "grad_norm": 2078294.875, + "learning_rate": 8.832807570977918e-05, + "loss": 0.6079, + "mean_token_accuracy": 0.8367695853114128, + "num_tokens": 17271306.0, + "step": 290 + }, + { + "entropy": 0.5837210457772016, + "epoch": 0.9486166007905138, + "grad_norm": 2131766.0, + "learning_rate": 5.6782334384858046e-05, + "loss": 0.5676, + "mean_token_accuracy": 0.8406473062932491, + "num_tokens": 17878825.0, + "step": 300 + }, + { + "entropy": 0.5644817750900983, + "epoch": 0.9802371541501976, + "grad_norm": 11456195.0, + "learning_rate": 2.5236593059936908e-05, + "loss": 0.5706, + "mean_token_accuracy": 0.8438552170991898, + "num_tokens": 18498693.0, + "step": 310 + } + ], + "logging_steps": 10, + "max_steps": 317, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.6925009010147574e+18, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-317/training_args.bin b/checkpoint-317/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..120f85ab118be2c00862d7334c96dd25039dfeea --- /dev/null +++ b/checkpoint-317/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bad5302c07edbe0267e2b38e65245429ed8bff1dce0fb4fc7af4aef2083cf5b +size 5880 diff --git a/checkpoint-50/README.md b/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..235c2e6ff7d5d996be0c01802fa2d7707c43b969 --- /dev/null +++ b/checkpoint-50/README.md @@ -0,0 +1,209 @@ +--- +base_model: zai-org/GLM-4-32B-0414 +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:zai-org/GLM-4-32B-0414 +- lora +- sft +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.17.1 \ No newline at end of file diff --git a/checkpoint-50/adapter_config.json b/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f0ebe86d1363a1171c6cd0dcf67ad18fa37c9f85 --- /dev/null +++ b/checkpoint-50/adapter_config.json @@ -0,0 +1,42 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "zai-org/GLM-4-32B-0414", + "bias": "none", + "corda_config": null, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.0, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "qalora_group_size": 16, + "r": 256, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "up_proj", + "down_proj", + "o_proj", + "q_proj", + "gate_proj", + "k_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-50/adapter_model.safetensors b/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..07ec940a467dc825c98cfaf61ff416be5bbfef90 --- /dev/null +++ b/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd393cf0b22d511d1526b8ede144ce8e555f7ddd6c6f6c1a1c6ef0e975b139b6 +size 4157688432 diff --git a/checkpoint-50/chat_template.jinja b/checkpoint-50/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..fdd583731de8bdf1830d38a3e34f7cd8f80605dd --- /dev/null +++ b/checkpoint-50/chat_template.jinja @@ -0,0 +1,41 @@ +[gMASK] +{%- if tools -%} +<|system|> +# 可用工具 +{% for tool in tools %} + {%- set function = tool.function if tool.get("function") else tool %} + +## {{ function.name }} + +{{ function | tojson(indent=4, ensure_ascii=False) }} +在调用上述函数时,请使用 Json 格式表示调用的参数。 +{%- endfor %} +{%- endif -%} + +{%- for msg in messages %} + {%- if msg.role == 'system' %} +<|system|> +{{ msg.content }} + {%- endif %} +{%- endfor %} + +{%- for message in messages if message.role != 'system' %} + {%- set role = message['role'] %} + {%- set content = message['content'] %} + {%- set meta = message.get("metadata", "") %} + + {%- if role == 'user' %} +<|user|> +{{ content }} + {%- elif role == 'assistant' and not meta %} +<|assistant|> +{{ content }} + {%- elif role == 'assistant' and meta %} +<|assistant|>{{ meta }} +{{ content }} + {%- elif role == 'observation' %} +<|observation|> +{{ content }} + {%- endif %} +{%- endfor %} +{% if add_generation_prompt %}<|assistant|>{% endif %} \ No newline at end of file diff --git a/checkpoint-50/optimizer.pt b/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ccec7733159552c785fa8548716183b72677c58 --- /dev/null +++ b/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c257e527ce5ba8c2e2327e7167a002505934ebf85035077b43ff79484c157e47 +size 8315744524 diff --git a/checkpoint-50/rng_state.pth b/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93a52228ea16468eaab41b66555ced3eaaea002c --- /dev/null +++ b/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:386fcc8cc1089aade9450d86fb239ea3483f455fd2d78d8378645feecfec9d69 +size 14244 diff --git a/checkpoint-50/scheduler.pt b/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2b5e59c32706c726e69c769dd3d08411fb1395d --- /dev/null +++ b/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0fac3922a836e4f00f87cfe1225d31aa2d017be320ad0d10597e2ab326b3425 +size 1064 diff --git a/checkpoint-50/special_tokens_map.json b/checkpoint-50/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf953c2c8a3c89778c92e54c685942bb1130616 --- /dev/null +++ b/checkpoint-50/special_tokens_map.json @@ -0,0 +1,32 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "eos_token": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/checkpoint-50/tokenizer.json b/checkpoint-50/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1dde37a2715c11628fc84bf571976f9f80eb69 --- /dev/null +++ b/checkpoint-50/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c +size 19966496 diff --git a/checkpoint-50/tokenizer_config.json b/checkpoint-50/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d690b31eb34e3d9741aff67b37e0ecda2604739b --- /dev/null +++ b/checkpoint-50/tokenizer_config.json @@ -0,0 +1,145 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|user|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/checkpoint-50/trainer_state.json b/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a77f26ba3eb4bd019dc6a9d742e44d94d5bb4d80 --- /dev/null +++ b/checkpoint-50/trainer_state.json @@ -0,0 +1,84 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.15810276679841898, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "entropy": 0.5656170375645161, + "epoch": 0.03162055335968379, + "grad_norm": 487949.625, + "learning_rate": 0.000971608832807571, + "loss": 0.598, + "mean_token_accuracy": 0.8383935391902924, + "num_tokens": 611773.0, + "step": 10 + }, + { + "entropy": 0.5844515649601817, + "epoch": 0.06324110671936758, + "grad_norm": 3576.093505859375, + "learning_rate": 0.0009400630914826499, + "loss": 0.6717, + "mean_token_accuracy": 0.8245183542370796, + "num_tokens": 1210060.0, + "step": 20 + }, + { + "entropy": 0.4919601235538721, + "epoch": 0.09486166007905138, + "grad_norm": 462362592.0, + "learning_rate": 0.0009085173501577287, + "loss": 0.6075, + "mean_token_accuracy": 0.8355057552456856, + "num_tokens": 1815732.0, + "step": 30 + }, + { + "entropy": 0.45399096198379996, + "epoch": 0.12648221343873517, + "grad_norm": 48967.296875, + "learning_rate": 0.0008769716088328076, + "loss": 0.6877, + "mean_token_accuracy": 0.8247161574661732, + "num_tokens": 2372291.0, + "step": 40 + }, + { + "entropy": 0.5189430050551891, + "epoch": 0.15810276679841898, + "grad_norm": 20488920.0, + "learning_rate": 0.0008454258675078864, + "loss": 0.6631, + "mean_token_accuracy": 0.8253108873963356, + "num_tokens": 2970656.0, + "step": 50 + } + ], + "logging_steps": 10, + "max_steps": 317, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.823854699123835e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-50/training_args.bin b/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..120f85ab118be2c00862d7334c96dd25039dfeea --- /dev/null +++ b/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bad5302c07edbe0267e2b38e65245429ed8bff1dce0fb4fc7af4aef2083cf5b +size 5880 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3cf953c2c8a3c89778c92e54c685942bb1130616 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,32 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "eos_token": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..4d1dde37a2715c11628fc84bf571976f9f80eb69 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ebeac0d8bd7879ead7b43c16b44981f277e47225de2bd7de9ae1a6cc664a8c +size 19966496 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d690b31eb34e3d9741aff67b37e0ecda2604739b --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,145 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|user|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "right", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..120f85ab118be2c00862d7334c96dd25039dfeea --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bad5302c07edbe0267e2b38e65245429ed8bff1dce0fb4fc7af4aef2083cf5b +size 5880