diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..926938ffa72030f0b1ae50de93a836b97648459c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-1252/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1565/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-1878/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-2191/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-2504/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-2817/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-313/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-3130/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-626/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-939/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoint-1252/config.json b/checkpoint-1252/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-1252/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-1252/generation_config.json b/checkpoint-1252/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-1252/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-1252/model.safetensors b/checkpoint-1252/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b546406d066edb153bbb12d29d6a8f91f5eb5e7a --- /dev/null +++ b/checkpoint-1252/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f0525f8ab2ffbe2efdc5812ffa88e20a6c6e55039a84aa91af1f408701f121 +size 2444578688 diff --git a/checkpoint-1252/optimizer.pt b/checkpoint-1252/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..77eeb80db0d4f98736eb73d1da4f28094a4dbe85 --- /dev/null +++ b/checkpoint-1252/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d2a0d279b41dbf52b8e81d0cb9810962d9b442c07d78409de40d4296443da3 +size 4887473903 diff --git a/checkpoint-1252/rng_state.pth b/checkpoint-1252/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..72aa1e16d783515fd90fe22c76a0c8dcfbaf6586 --- /dev/null +++ b/checkpoint-1252/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1aaeef3f9410d218573ec2fad66c7e9598bf5a6f15028d250fda343f351bd7e +size 14244 diff --git a/checkpoint-1252/scheduler.pt b/checkpoint-1252/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c7485afb37e60eb1da499cf85a2e73ba80cdeba --- /dev/null +++ b/checkpoint-1252/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b02ce463792c97d1180f23f7f149e16e463ba4f3f98ce5afc01f4a1e6f7e51bb +size 1064 diff --git a/checkpoint-1252/sentencepiece.bpe.model b/checkpoint-1252/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-1252/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-1252/special_tokens_map.json b/checkpoint-1252/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-1252/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-1252/tokenizer.json b/checkpoint-1252/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-1252/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-1252/tokenizer_config.json b/checkpoint-1252/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-1252/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-1252/trainer_state.json b/checkpoint-1252/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bac6944edd5b9ad2d420b993b54a0cc063ced708 --- /dev/null +++ b/checkpoint-1252/trainer_state.json @@ -0,0 +1,48 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 1252, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + }, + { + "epoch": 3.194888178913738, + "grad_norm": 1.323052167892456, + "learning_rate": 2.10990099009901e-05, + "loss": 0.4026, + "step": 1000 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3834058896310272.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1252/training_args.bin b/checkpoint-1252/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-1252/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/checkpoint-1565/config.json b/checkpoint-1565/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-1565/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-1565/generation_config.json b/checkpoint-1565/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-1565/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-1565/model.safetensors b/checkpoint-1565/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f06ab7131c95b2157ecf7d41c97d8b0f9949ec10 --- /dev/null +++ b/checkpoint-1565/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a285f7e42a8b082a91007f739aad6efbf69eb19ecd08cf8b664f1a1537049390 +size 2444578688 diff --git a/checkpoint-1565/optimizer.pt b/checkpoint-1565/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f755ded95bee2b661efa13967a44ecff710815b --- /dev/null +++ b/checkpoint-1565/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b0dfd9e52bc4f706fafe9405e2615a37736dce0dcdf9653e8e3362c690e957 +size 4887473903 diff --git a/checkpoint-1565/rng_state.pth b/checkpoint-1565/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6270014a647013445ac986f1bfd576ecb9594275 --- /dev/null +++ b/checkpoint-1565/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a3993d5a7d4378d375a299f6acc06eb97873d96ac6120c4876cf8f00bfae4a1 +size 14244 diff --git a/checkpoint-1565/scheduler.pt b/checkpoint-1565/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..20e365ed9c2b68d496116f7e181056a7ab30b891 --- /dev/null +++ b/checkpoint-1565/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8a71062b120444294f597a9876fe4f3d1ce572002214ad319857e85da83d7e1 +size 1064 diff --git a/checkpoint-1565/sentencepiece.bpe.model b/checkpoint-1565/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-1565/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-1565/special_tokens_map.json b/checkpoint-1565/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-1565/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-1565/tokenizer.json b/checkpoint-1565/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-1565/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-1565/tokenizer_config.json b/checkpoint-1565/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-1565/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-1565/trainer_state.json b/checkpoint-1565/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d303ff8f5270aea483bdffed85afffd2d95b6279 --- /dev/null +++ b/checkpoint-1565/trainer_state.json @@ -0,0 +1,55 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 1565, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + }, + { + "epoch": 3.194888178913738, + "grad_norm": 1.323052167892456, + "learning_rate": 2.10990099009901e-05, + "loss": 0.4026, + "step": 1000 + }, + { + "epoch": 4.792332268370607, + "grad_norm": 1.4039280414581299, + "learning_rate": 1.614851485148515e-05, + "loss": 0.2068, + "step": 1500 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4792709065998336.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1565/training_args.bin b/checkpoint-1565/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-1565/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/checkpoint-1878/config.json b/checkpoint-1878/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-1878/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-1878/generation_config.json b/checkpoint-1878/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-1878/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-1878/model.safetensors b/checkpoint-1878/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0b27b654672618276f5db7758baf5f017659936 --- /dev/null +++ b/checkpoint-1878/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc67c7ba4e87413f693653576c198fddf4b12b1d6adde26c0e15f436b6605c6f +size 2444578688 diff --git a/checkpoint-1878/optimizer.pt b/checkpoint-1878/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6a80e7afcb364087cec54877946b6e921b4de64 --- /dev/null +++ b/checkpoint-1878/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55283e83243c881305f1c8cf01e502bc929dd67cd4fec32a27f054ed65844a8b +size 4887473903 diff --git a/checkpoint-1878/rng_state.pth b/checkpoint-1878/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..60e584b69f47ba71e76c7fb43460898ab162459a --- /dev/null +++ b/checkpoint-1878/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab455eade6e331a7e2eb57d6f53c9111dda70f9607b06d84502160522eeaee4f +size 14244 diff --git a/checkpoint-1878/scheduler.pt b/checkpoint-1878/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b3e6b3cca04eaff2d06da662588f3fa708535a0 --- /dev/null +++ b/checkpoint-1878/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c86ee9ec778e6f3e17c52305e8115444951a8b42a0727a5ffbcf33043046f971 +size 1064 diff --git a/checkpoint-1878/sentencepiece.bpe.model b/checkpoint-1878/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-1878/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-1878/special_tokens_map.json b/checkpoint-1878/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-1878/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-1878/tokenizer.json b/checkpoint-1878/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-1878/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-1878/tokenizer_config.json b/checkpoint-1878/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-1878/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-1878/trainer_state.json b/checkpoint-1878/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..68892a77389c170a8cf40aa9764163bb32c81a62 --- /dev/null +++ b/checkpoint-1878/trainer_state.json @@ -0,0 +1,55 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 1878, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + }, + { + "epoch": 3.194888178913738, + "grad_norm": 1.323052167892456, + "learning_rate": 2.10990099009901e-05, + "loss": 0.4026, + "step": 1000 + }, + { + "epoch": 4.792332268370607, + "grad_norm": 1.4039280414581299, + "learning_rate": 1.614851485148515e-05, + "loss": 0.2068, + "step": 1500 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5751020621660160.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1878/training_args.bin b/checkpoint-1878/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-1878/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/checkpoint-2191/config.json b/checkpoint-2191/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-2191/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-2191/generation_config.json b/checkpoint-2191/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-2191/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-2191/model.safetensors b/checkpoint-2191/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b7d94ed4c9ab591170d465555e9b27259cf0ac6 --- /dev/null +++ b/checkpoint-2191/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3715fdc8ab27cab8e3326ee9f96ff39c51f67781fa83a4e7185fbce523f0f647 +size 2444578688 diff --git a/checkpoint-2191/optimizer.pt b/checkpoint-2191/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..537936c9d00589510f119e9625ed12cdff17da53 --- /dev/null +++ b/checkpoint-2191/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63538078cfc908a3f2790dae832ba84dc5a072187c63c227db4d55b77335b341 +size 4887473903 diff --git a/checkpoint-2191/rng_state.pth b/checkpoint-2191/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e98e3921383ba5e46659610a485d2a104919ba2f --- /dev/null +++ b/checkpoint-2191/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:637e40723128bc2e097af2e13e354071cfb4955daa28dc2101829d4ed8380cd0 +size 14244 diff --git a/checkpoint-2191/scheduler.pt b/checkpoint-2191/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0b95402d8f0f835c24262c510e56c934cba1b78 --- /dev/null +++ b/checkpoint-2191/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10e8b0cb1aec2c0d24d830d97bacf24e577cf7a426abed845491650f16f28bb +size 1064 diff --git a/checkpoint-2191/sentencepiece.bpe.model b/checkpoint-2191/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-2191/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-2191/special_tokens_map.json b/checkpoint-2191/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-2191/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-2191/tokenizer.json b/checkpoint-2191/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-2191/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-2191/tokenizer_config.json b/checkpoint-2191/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-2191/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-2191/trainer_state.json b/checkpoint-2191/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f11341b20f5f593acb71a2047c59b5eb5d32c623 --- /dev/null +++ b/checkpoint-2191/trainer_state.json @@ -0,0 +1,62 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.0, + "eval_steps": 500, + "global_step": 2191, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + }, + { + "epoch": 3.194888178913738, + "grad_norm": 1.323052167892456, + "learning_rate": 2.10990099009901e-05, + "loss": 0.4026, + "step": 1000 + }, + { + "epoch": 4.792332268370607, + "grad_norm": 1.4039280414581299, + "learning_rate": 1.614851485148515e-05, + "loss": 0.2068, + "step": 1500 + }, + { + "epoch": 6.389776357827476, + "grad_norm": 0.6008857488632202, + "learning_rate": 1.1198019801980199e-05, + "loss": 0.1062, + "step": 2000 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6712311980752896.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2191/training_args.bin b/checkpoint-2191/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-2191/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/checkpoint-2504/config.json b/checkpoint-2504/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-2504/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-2504/generation_config.json b/checkpoint-2504/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-2504/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-2504/model.safetensors b/checkpoint-2504/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c768d38575d60988906089c4faa0c2b4081e8ab7 --- /dev/null +++ b/checkpoint-2504/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff000278d81dffc7e911acffd07ae3798d23fd89007a030af7c6363e699f614 +size 2444578688 diff --git a/checkpoint-2504/optimizer.pt b/checkpoint-2504/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c700cd55952e17d850c0c889a47e5c86cf2c069 --- /dev/null +++ b/checkpoint-2504/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52b81e3aa2d1fdcd6882dacc3e971e0f44d143c853cb010d26999676c0a0a4c7 +size 4887473903 diff --git a/checkpoint-2504/rng_state.pth b/checkpoint-2504/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..546609cf6994a81fbd4a3aa74e241b5541d0785b --- /dev/null +++ b/checkpoint-2504/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba6d05745d81eb8970d32c105f496a5bb40d0139932c387915805e2d3280c6bf +size 14244 diff --git a/checkpoint-2504/scheduler.pt b/checkpoint-2504/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..82c95251a4e7995f854299f397313d0dd02a9938 --- /dev/null +++ b/checkpoint-2504/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29ce1ee2791ab6e384afa58710ffb43ee9b5de88edf39b5d48677989e14368a2 +size 1064 diff --git a/checkpoint-2504/sentencepiece.bpe.model b/checkpoint-2504/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-2504/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-2504/special_tokens_map.json b/checkpoint-2504/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-2504/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-2504/tokenizer.json b/checkpoint-2504/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-2504/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-2504/tokenizer_config.json b/checkpoint-2504/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-2504/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-2504/trainer_state.json b/checkpoint-2504/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..14ba125da535e4d271e97b7d5cff1081252e3920 --- /dev/null +++ b/checkpoint-2504/trainer_state.json @@ -0,0 +1,69 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.0, + "eval_steps": 500, + "global_step": 2504, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + }, + { + "epoch": 3.194888178913738, + "grad_norm": 1.323052167892456, + "learning_rate": 2.10990099009901e-05, + "loss": 0.4026, + "step": 1000 + }, + { + "epoch": 4.792332268370607, + "grad_norm": 1.4039280414581299, + "learning_rate": 1.614851485148515e-05, + "loss": 0.2068, + "step": 1500 + }, + { + "epoch": 6.389776357827476, + "grad_norm": 0.6008857488632202, + "learning_rate": 1.1198019801980199e-05, + "loss": 0.1062, + "step": 2000 + }, + { + "epoch": 7.987220447284345, + "grad_norm": 0.789228081703186, + "learning_rate": 6.247524752475248e-06, + "loss": 0.0555, + "step": 2500 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7670894427635712.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2504/training_args.bin b/checkpoint-2504/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-2504/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/checkpoint-2817/config.json b/checkpoint-2817/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-2817/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-2817/generation_config.json b/checkpoint-2817/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-2817/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-2817/model.safetensors b/checkpoint-2817/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c90a58f1a402602b6dab3240011c690545ab698 --- /dev/null +++ b/checkpoint-2817/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b87e0fa21823996748de2b56384ae085a9e91877f44f78a63e9dfaf58c754e88 +size 2444578688 diff --git a/checkpoint-2817/optimizer.pt b/checkpoint-2817/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..08d9d9ddfbc490a3a8222cc7b265adc92d46078e --- /dev/null +++ b/checkpoint-2817/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7049ab82ec29491273b6039005a99b7d86034ef0d9c89eda9a5eebaac37376 +size 4887473903 diff --git a/checkpoint-2817/rng_state.pth b/checkpoint-2817/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d26ffb0469d619dbb12b26e43eee6bcce119227 --- /dev/null +++ b/checkpoint-2817/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f39e9c65a54d796dd705c3ac2098012a649b7be95eb5302a994bdba6442dc8 +size 14244 diff --git a/checkpoint-2817/scheduler.pt b/checkpoint-2817/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0317e5ccbca29f87732d96f171f3b215e897211 --- /dev/null +++ b/checkpoint-2817/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3dc4e5fc93cc164724dc0adf565e63211f04021ed369396273da790a9b76666 +size 1064 diff --git a/checkpoint-2817/sentencepiece.bpe.model b/checkpoint-2817/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-2817/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-2817/special_tokens_map.json b/checkpoint-2817/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-2817/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-2817/tokenizer.json b/checkpoint-2817/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-2817/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-2817/tokenizer_config.json b/checkpoint-2817/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-2817/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-2817/trainer_state.json b/checkpoint-2817/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..820813672f9bb20547549bff98df640db6f06839 --- /dev/null +++ b/checkpoint-2817/trainer_state.json @@ -0,0 +1,69 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 9.0, + "eval_steps": 500, + "global_step": 2817, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + }, + { + "epoch": 3.194888178913738, + "grad_norm": 1.323052167892456, + "learning_rate": 2.10990099009901e-05, + "loss": 0.4026, + "step": 1000 + }, + { + "epoch": 4.792332268370607, + "grad_norm": 1.4039280414581299, + "learning_rate": 1.614851485148515e-05, + "loss": 0.2068, + "step": 1500 + }, + { + "epoch": 6.389776357827476, + "grad_norm": 0.6008857488632202, + "learning_rate": 1.1198019801980199e-05, + "loss": 0.1062, + "step": 2000 + }, + { + "epoch": 7.987220447284345, + "grad_norm": 0.789228081703186, + "learning_rate": 6.247524752475248e-06, + "loss": 0.0555, + "step": 2500 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8630357270986752.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2817/training_args.bin b/checkpoint-2817/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-2817/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/checkpoint-313/config.json b/checkpoint-313/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-313/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-313/generation_config.json b/checkpoint-313/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-313/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-313/model.safetensors b/checkpoint-313/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed177be2b6d0f8f4be8eaa3dc9ed05eb36767d47 --- /dev/null +++ b/checkpoint-313/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:572090900b60e3d1b5bb096bb8097d8635f164cb329b1120ea008ed0d09de8e7 +size 2444578688 diff --git a/checkpoint-313/optimizer.pt b/checkpoint-313/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..602889b176d0c6f27898b9a23cf46fae60769deb --- /dev/null +++ b/checkpoint-313/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:517454890968776122f8baa134e1fd13634427b8e390973e77529346892eee5f +size 4887473903 diff --git a/checkpoint-313/rng_state.pth b/checkpoint-313/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..924a0d267cabe36eb4883f42a2e088c00c443bbc --- /dev/null +++ b/checkpoint-313/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d57c9aaa96fcc774394394493eb5ee93f52435442a5d57394ccd0c3faede9b5c +size 14244 diff --git a/checkpoint-313/scheduler.pt b/checkpoint-313/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..942c73073adb22fbd26ffaf74dd27c4c835eba64 --- /dev/null +++ b/checkpoint-313/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9947e25e4b154fec5385c47f0be8006ab0ae4690aa5795c59e99480d8c4bc06 +size 1064 diff --git a/checkpoint-313/sentencepiece.bpe.model b/checkpoint-313/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-313/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-313/special_tokens_map.json b/checkpoint-313/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-313/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-313/tokenizer.json b/checkpoint-313/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-313/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-313/tokenizer_config.json b/checkpoint-313/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-313/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-313/trainer_state.json b/checkpoint-313/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9d18ea0b71737c538f1672c26715d13ff4597f18 --- /dev/null +++ b/checkpoint-313/trainer_state.json @@ -0,0 +1,33 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 313, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 956957099556864.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-313/training_args.bin b/checkpoint-313/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-313/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/checkpoint-3130/config.json b/checkpoint-3130/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-3130/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-3130/generation_config.json b/checkpoint-3130/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-3130/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-3130/model.safetensors b/checkpoint-3130/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e5c81bd086c8dbaba32b1204f7eb056b7cfa12b --- /dev/null +++ b/checkpoint-3130/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26581a190a90db2a149f605ac07f203b99c3ef969ed2259d6f9edae4f9b2a7f2 +size 2444578688 diff --git a/checkpoint-3130/optimizer.pt b/checkpoint-3130/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5ed0bc6d6fb5dc0ebb4405440a7377691c77020 --- /dev/null +++ b/checkpoint-3130/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eebf4d2fc4be68306a2898fb430e384a455abf1fe0ced42822b2a2df70e85b9 +size 4887473903 diff --git a/checkpoint-3130/rng_state.pth b/checkpoint-3130/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7936a195a38ace8b1af4789ed24d5112f9c926e7 --- /dev/null +++ b/checkpoint-3130/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:375c4ef700422abd726c8208807985531469ab0858a5ee6dbc4ec1236448834b +size 14244 diff --git a/checkpoint-3130/scheduler.pt b/checkpoint-3130/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8706e3cf751ea7fc3d1a828895ad9f06fb5fc50 --- /dev/null +++ b/checkpoint-3130/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:576521f57ce6164abf82c0638a1d0835ebc9679b95cf48c027412ef9fe1fe035 +size 1064 diff --git a/checkpoint-3130/sentencepiece.bpe.model b/checkpoint-3130/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-3130/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-3130/special_tokens_map.json b/checkpoint-3130/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-3130/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-3130/tokenizer.json b/checkpoint-3130/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-3130/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-3130/tokenizer_config.json b/checkpoint-3130/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-3130/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-3130/trainer_state.json b/checkpoint-3130/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ea23d1e7baa0bda59a6b54e7821b82b4cb7d5b9c --- /dev/null +++ b/checkpoint-3130/trainer_state.json @@ -0,0 +1,76 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 3130, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + }, + { + "epoch": 3.194888178913738, + "grad_norm": 1.323052167892456, + "learning_rate": 2.10990099009901e-05, + "loss": 0.4026, + "step": 1000 + }, + { + "epoch": 4.792332268370607, + "grad_norm": 1.4039280414581299, + "learning_rate": 1.614851485148515e-05, + "loss": 0.2068, + "step": 1500 + }, + { + "epoch": 6.389776357827476, + "grad_norm": 0.6008857488632202, + "learning_rate": 1.1198019801980199e-05, + "loss": 0.1062, + "step": 2000 + }, + { + "epoch": 7.987220447284345, + "grad_norm": 0.789228081703186, + "learning_rate": 6.247524752475248e-06, + "loss": 0.0555, + "step": 2500 + }, + { + "epoch": 9.584664536741213, + "grad_norm": 0.2787887454032898, + "learning_rate": 1.2970297029702972e-06, + "loss": 0.0336, + "step": 3000 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 9589481500311552.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3130/training_args.bin b/checkpoint-3130/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-3130/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/checkpoint-626/config.json b/checkpoint-626/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-626/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-626/generation_config.json b/checkpoint-626/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-626/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-626/model.safetensors b/checkpoint-626/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5d5fe9f36344c4a3cf054452a5a55aa86d5489a --- /dev/null +++ b/checkpoint-626/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bed131c20cd14d2d45f9719b44732275c65269a18aa6473b51842b806bdf866 +size 2444578688 diff --git a/checkpoint-626/optimizer.pt b/checkpoint-626/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec04048366121d8ee52b27e29746ae14056c206c --- /dev/null +++ b/checkpoint-626/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d49953eb97a3b1b4edc307da7d4ddd0d3201cd841ffcba8d89ef39f06d1061a8 +size 4887473903 diff --git a/checkpoint-626/rng_state.pth b/checkpoint-626/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d15653d03d372251e0d8707cebb110f67dd3f24f --- /dev/null +++ b/checkpoint-626/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:652a9baf52010902046aa8af83824a30c0231716204fb305c979a9129756e231 +size 14244 diff --git a/checkpoint-626/scheduler.pt b/checkpoint-626/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9d429de181e82088eb7dc7feb3b67595d3171b1 --- /dev/null +++ b/checkpoint-626/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3346e1ecf69af61eac82d54c5fc86115279382a4f440817692128ac6afdeeb4d +size 1064 diff --git a/checkpoint-626/sentencepiece.bpe.model b/checkpoint-626/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-626/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-626/special_tokens_map.json b/checkpoint-626/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-626/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-626/tokenizer.json b/checkpoint-626/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-626/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-626/tokenizer_config.json b/checkpoint-626/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-626/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-626/trainer_state.json b/checkpoint-626/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f7679a78db839009850354c7682bf0b7e9d0c423 --- /dev/null +++ b/checkpoint-626/trainer_state.json @@ -0,0 +1,41 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 626, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1916081328881664.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-626/training_args.bin b/checkpoint-626/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-626/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/checkpoint-939/config.json b/checkpoint-939/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/checkpoint-939/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/checkpoint-939/generation_config.json b/checkpoint-939/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/checkpoint-939/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/checkpoint-939/model.safetensors b/checkpoint-939/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2bfab6283e0eb389e991f3853e00e071e6ea46d --- /dev/null +++ b/checkpoint-939/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9694839dcc2469bb701069d2b8ea74fca902be1c38f3787d3ff24e73d26face +size 2444578688 diff --git a/checkpoint-939/optimizer.pt b/checkpoint-939/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..260a5c38f35713d936e63e177c008b7ef707cadb --- /dev/null +++ b/checkpoint-939/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3456a12b933c36725bfec8e6eb0439cf0a514519cd1d0334798d465b1912a07 +size 4887473903 diff --git a/checkpoint-939/rng_state.pth b/checkpoint-939/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d97150c1649eb64c5cb27ffc676a82cea4a32b5 --- /dev/null +++ b/checkpoint-939/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d08114cf581a5ea7b461376deb187d6bed683843bbce50eeda0cfa79238be1e5 +size 14244 diff --git a/checkpoint-939/scheduler.pt b/checkpoint-939/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c787429b0949c274f04c16760afeb7134c9e4fa --- /dev/null +++ b/checkpoint-939/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da727ab9e4d518533045b5030ae13a3162d685874ee8570496a8365d7e9ccbc3 +size 1064 diff --git a/checkpoint-939/sentencepiece.bpe.model b/checkpoint-939/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/checkpoint-939/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/checkpoint-939/special_tokens_map.json b/checkpoint-939/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/checkpoint-939/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/checkpoint-939/tokenizer.json b/checkpoint-939/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/checkpoint-939/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/checkpoint-939/tokenizer_config.json b/checkpoint-939/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/checkpoint-939/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/checkpoint-939/trainer_state.json b/checkpoint-939/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..54a82ebe1f5d4f6b0c39170a401227028a4d61db --- /dev/null +++ b/checkpoint-939/trainer_state.json @@ -0,0 +1,41 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 939, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2874934666985472.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-939/training_args.bin b/checkpoint-939/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92 --- /dev/null +++ b/checkpoint-939/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3 +size 5432 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a --- /dev/null +++ b/config.json @@ -0,0 +1,57 @@ +{ + "_num_labels": 3, + "activation_dropout": 0.0, + "activation_function": "relu", + "add_bias_logits": false, + "add_final_layer_norm": true, + "architectures": [ + "MBartForConditionalGeneration" + ], + "attention_dropout": 0.0, + "bos_token_id": 0, + "classif_dropout": 0.0, + "classifier_dropout": 0.0, + "d_model": 1024, + "decoder_attention_heads": 16, + "decoder_ffn_dim": 4096, + "decoder_layerdrop": 0.0, + "decoder_layers": 12, + "decoder_start_token_id": 2, + "dropout": 0.1, + "early_stopping": null, + "encoder_attention_heads": 16, + "encoder_ffn_dim": 4096, + "encoder_layerdrop": 0.0, + "encoder_layers": 12, + "eos_token_id": 2, + "forced_eos_token_id": 2, + "gradient_checkpointing": false, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1", + "2": "LABEL_2" + }, + "init_std": 0.02, + "is_encoder_decoder": true, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1, + "LABEL_2": 2 + }, + "max_length": null, + "max_position_embeddings": 1024, + "model_type": "mbart", + "normalize_before": true, + "normalize_embedding": true, + "num_beams": null, + "num_hidden_layers": 12, + "output_past": true, + "pad_token_id": 1, + "scale_embedding": true, + "static_position_embeddings": false, + "tokenizer_class": "MBart50Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.52.4", + "use_cache": true, + "vocab_size": 250054 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "_from_model_config": true, + "bos_token_id": 0, + "decoder_start_token_id": 2, + "early_stopping": true, + "eos_token_id": 2, + "forced_bos_token_id": 250025, + "forced_eos_token_id": 2, + "max_length": 200, + "num_beams": 5, + "pad_token_id": 1, + "transformers_version": "4.52.4" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e5c81bd086c8dbaba32b1204f7eb056b7cfa12b --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26581a190a90db2a149f605ac07f203b99c3ef969ed2259d6f9edae4f9b2a7f2 +size 2444578688 diff --git a/sentencepiece.bpe.model b/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,69 @@ +{ + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c +size 17110076 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,529 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "ar_AR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250002": { + "content": "cs_CZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250003": { + "content": "de_DE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250004": { + "content": "en_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250005": { + "content": "es_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250006": { + "content": "et_EE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250007": { + "content": "fi_FI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250008": { + "content": "fr_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250009": { + "content": "gu_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250010": { + "content": "hi_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250011": { + "content": "it_IT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250012": { + "content": "ja_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250013": { + "content": "kk_KZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250014": { + "content": "ko_KR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250015": { + "content": "lt_LT", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250016": { + "content": "lv_LV", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250017": { + "content": "my_MM", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250018": { + "content": "ne_NP", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250019": { + "content": "nl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250020": { + "content": "ro_RO", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250021": { + "content": "ru_RU", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250022": { + "content": "si_LK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250023": { + "content": "tr_TR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250024": { + "content": "vi_VN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250025": { + "content": "zh_CN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250026": { + "content": "af_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250027": { + "content": "az_AZ", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250028": { + "content": "bn_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250029": { + "content": "fa_IR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250030": { + "content": "he_IL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250031": { + "content": "hr_HR", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250032": { + "content": "id_ID", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250033": { + "content": "ka_GE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250034": { + "content": "km_KH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250035": { + "content": "mk_MK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250036": { + "content": "ml_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250037": { + "content": "mn_MN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250038": { + "content": "mr_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250039": { + "content": "pl_PL", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250040": { + "content": "ps_AF", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250041": { + "content": "pt_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250042": { + "content": "sv_SE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250043": { + "content": "sw_KE", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250044": { + "content": "ta_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250045": { + "content": "te_IN", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250046": { + "content": "th_TH", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250047": { + "content": "tl_XX", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250048": { + "content": "uk_UA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250049": { + "content": "ur_PK", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250050": { + "content": "xh_ZA", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250051": { + "content": "gl_ES", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250052": { + "content": "sl_SI", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250053": { + "content": "", + "lstrip": true, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "ar_AR", + "cs_CZ", + "de_DE", + "en_XX", + "es_XX", + "et_EE", + "fi_FI", + "fr_XX", + "gu_IN", + "hi_IN", + "it_IT", + "ja_XX", + "kk_KZ", + "ko_KR", + "lt_LT", + "lv_LV", + "my_MM", + "ne_NP", + "nl_XX", + "ro_RO", + "ru_RU", + "si_LK", + "tr_TR", + "vi_VN", + "zh_CN", + "af_ZA", + "az_AZ", + "bn_IN", + "fa_IR", + "he_IL", + "hr_HR", + "id_ID", + "ka_GE", + "km_KH", + "mk_MK", + "ml_IN", + "mn_MN", + "mr_IN", + "pl_PL", + "ps_AF", + "pt_XX", + "sv_SE", + "sw_KE", + "ta_IN", + "te_IN", + "th_TH", + "tl_XX", + "uk_UA", + "ur_PK", + "xh_ZA", + "gl_ES", + "sl_SI" + ], + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "", + "extra_special_tokens": {}, + "language_codes": "ML50", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "src_lang": "tl_XX", + "tgt_lang": "zh_CN", + "tokenizer_class": "MBart50Tokenizer", + "unk_token": "" +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..572b9f2a7c132cfc0b2cf665456ee6b00aa326cb --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,85 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 10.0, + "eval_steps": 500, + "global_step": 3130, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.5974440894568689, + "grad_norm": 1.9811842441558838, + "learning_rate": 2.604950495049505e-05, + "loss": 2.047, + "step": 500 + }, + { + "epoch": 3.194888178913738, + "grad_norm": 1.323052167892456, + "learning_rate": 2.10990099009901e-05, + "loss": 0.4026, + "step": 1000 + }, + { + "epoch": 4.792332268370607, + "grad_norm": 1.4039280414581299, + "learning_rate": 1.614851485148515e-05, + "loss": 0.2068, + "step": 1500 + }, + { + "epoch": 6.389776357827476, + "grad_norm": 0.6008857488632202, + "learning_rate": 1.1198019801980199e-05, + "loss": 0.1062, + "step": 2000 + }, + { + "epoch": 7.987220447284345, + "grad_norm": 0.789228081703186, + "learning_rate": 6.247524752475248e-06, + "loss": 0.0555, + "step": 2500 + }, + { + "epoch": 9.584664536741213, + "grad_norm": 0.2787887454032898, + "learning_rate": 1.2970297029702972e-06, + "loss": 0.0336, + "step": 3000 + }, + { + "epoch": 10.0, + "step": 3130, + "total_flos": 9589481500311552.0, + "train_loss": 0.4568236516306575, + "train_runtime": 1164.9898, + "train_samples_per_second": 21.477, + "train_steps_per_second": 2.687 + } + ], + "logging_steps": 500, + "max_steps": 3130, + "num_input_tokens_seen": 0, + "num_train_epochs": 10, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 9589481500311552.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}