diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..926938ffa72030f0b1ae50de93a836b97648459c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-1252/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-1565/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-1878/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-2191/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-2504/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-2817/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-313/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-3130/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-626/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-939/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/checkpoint-1252/config.json b/checkpoint-1252/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-1252/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-1252/generation_config.json b/checkpoint-1252/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-1252/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-1252/model.safetensors b/checkpoint-1252/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b546406d066edb153bbb12d29d6a8f91f5eb5e7a
--- /dev/null
+++ b/checkpoint-1252/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67f0525f8ab2ffbe2efdc5812ffa88e20a6c6e55039a84aa91af1f408701f121
+size 2444578688
diff --git a/checkpoint-1252/optimizer.pt b/checkpoint-1252/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..77eeb80db0d4f98736eb73d1da4f28094a4dbe85
--- /dev/null
+++ b/checkpoint-1252/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0d2a0d279b41dbf52b8e81d0cb9810962d9b442c07d78409de40d4296443da3
+size 4887473903
diff --git a/checkpoint-1252/rng_state.pth b/checkpoint-1252/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..72aa1e16d783515fd90fe22c76a0c8dcfbaf6586
--- /dev/null
+++ b/checkpoint-1252/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1aaeef3f9410d218573ec2fad66c7e9598bf5a6f15028d250fda343f351bd7e
+size 14244
diff --git a/checkpoint-1252/scheduler.pt b/checkpoint-1252/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8c7485afb37e60eb1da499cf85a2e73ba80cdeba
--- /dev/null
+++ b/checkpoint-1252/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b02ce463792c97d1180f23f7f149e16e463ba4f3f98ce5afc01f4a1e6f7e51bb
+size 1064
diff --git a/checkpoint-1252/sentencepiece.bpe.model b/checkpoint-1252/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-1252/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-1252/special_tokens_map.json b/checkpoint-1252/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-1252/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-1252/tokenizer.json b/checkpoint-1252/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-1252/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-1252/tokenizer_config.json b/checkpoint-1252/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-1252/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-1252/trainer_state.json b/checkpoint-1252/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..bac6944edd5b9ad2d420b993b54a0cc063ced708
--- /dev/null
+++ b/checkpoint-1252/trainer_state.json
@@ -0,0 +1,48 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 4.0,
+ "eval_steps": 500,
+ "global_step": 1252,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ },
+ {
+ "epoch": 3.194888178913738,
+ "grad_norm": 1.323052167892456,
+ "learning_rate": 2.10990099009901e-05,
+ "loss": 0.4026,
+ "step": 1000
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 3834058896310272.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-1252/training_args.bin b/checkpoint-1252/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-1252/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/checkpoint-1565/config.json b/checkpoint-1565/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-1565/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-1565/generation_config.json b/checkpoint-1565/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-1565/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-1565/model.safetensors b/checkpoint-1565/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f06ab7131c95b2157ecf7d41c97d8b0f9949ec10
--- /dev/null
+++ b/checkpoint-1565/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a285f7e42a8b082a91007f739aad6efbf69eb19ecd08cf8b664f1a1537049390
+size 2444578688
diff --git a/checkpoint-1565/optimizer.pt b/checkpoint-1565/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f755ded95bee2b661efa13967a44ecff710815b
--- /dev/null
+++ b/checkpoint-1565/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20b0dfd9e52bc4f706fafe9405e2615a37736dce0dcdf9653e8e3362c690e957
+size 4887473903
diff --git a/checkpoint-1565/rng_state.pth b/checkpoint-1565/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..6270014a647013445ac986f1bfd576ecb9594275
--- /dev/null
+++ b/checkpoint-1565/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a3993d5a7d4378d375a299f6acc06eb97873d96ac6120c4876cf8f00bfae4a1
+size 14244
diff --git a/checkpoint-1565/scheduler.pt b/checkpoint-1565/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20e365ed9c2b68d496116f7e181056a7ab30b891
--- /dev/null
+++ b/checkpoint-1565/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8a71062b120444294f597a9876fe4f3d1ce572002214ad319857e85da83d7e1
+size 1064
diff --git a/checkpoint-1565/sentencepiece.bpe.model b/checkpoint-1565/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-1565/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-1565/special_tokens_map.json b/checkpoint-1565/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-1565/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-1565/tokenizer.json b/checkpoint-1565/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-1565/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-1565/tokenizer_config.json b/checkpoint-1565/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-1565/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-1565/trainer_state.json b/checkpoint-1565/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..d303ff8f5270aea483bdffed85afffd2d95b6279
--- /dev/null
+++ b/checkpoint-1565/trainer_state.json
@@ -0,0 +1,55 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 5.0,
+ "eval_steps": 500,
+ "global_step": 1565,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ },
+ {
+ "epoch": 3.194888178913738,
+ "grad_norm": 1.323052167892456,
+ "learning_rate": 2.10990099009901e-05,
+ "loss": 0.4026,
+ "step": 1000
+ },
+ {
+ "epoch": 4.792332268370607,
+ "grad_norm": 1.4039280414581299,
+ "learning_rate": 1.614851485148515e-05,
+ "loss": 0.2068,
+ "step": 1500
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 4792709065998336.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-1565/training_args.bin b/checkpoint-1565/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-1565/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/checkpoint-1878/config.json b/checkpoint-1878/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-1878/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-1878/generation_config.json b/checkpoint-1878/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-1878/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-1878/model.safetensors b/checkpoint-1878/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c0b27b654672618276f5db7758baf5f017659936
--- /dev/null
+++ b/checkpoint-1878/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc67c7ba4e87413f693653576c198fddf4b12b1d6adde26c0e15f436b6605c6f
+size 2444578688
diff --git a/checkpoint-1878/optimizer.pt b/checkpoint-1878/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e6a80e7afcb364087cec54877946b6e921b4de64
--- /dev/null
+++ b/checkpoint-1878/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55283e83243c881305f1c8cf01e502bc929dd67cd4fec32a27f054ed65844a8b
+size 4887473903
diff --git a/checkpoint-1878/rng_state.pth b/checkpoint-1878/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..60e584b69f47ba71e76c7fb43460898ab162459a
--- /dev/null
+++ b/checkpoint-1878/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab455eade6e331a7e2eb57d6f53c9111dda70f9607b06d84502160522eeaee4f
+size 14244
diff --git a/checkpoint-1878/scheduler.pt b/checkpoint-1878/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b3e6b3cca04eaff2d06da662588f3fa708535a0
--- /dev/null
+++ b/checkpoint-1878/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c86ee9ec778e6f3e17c52305e8115444951a8b42a0727a5ffbcf33043046f971
+size 1064
diff --git a/checkpoint-1878/sentencepiece.bpe.model b/checkpoint-1878/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-1878/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-1878/special_tokens_map.json b/checkpoint-1878/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-1878/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-1878/tokenizer.json b/checkpoint-1878/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-1878/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-1878/tokenizer_config.json b/checkpoint-1878/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-1878/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-1878/trainer_state.json b/checkpoint-1878/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..68892a77389c170a8cf40aa9764163bb32c81a62
--- /dev/null
+++ b/checkpoint-1878/trainer_state.json
@@ -0,0 +1,55 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 6.0,
+ "eval_steps": 500,
+ "global_step": 1878,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ },
+ {
+ "epoch": 3.194888178913738,
+ "grad_norm": 1.323052167892456,
+ "learning_rate": 2.10990099009901e-05,
+ "loss": 0.4026,
+ "step": 1000
+ },
+ {
+ "epoch": 4.792332268370607,
+ "grad_norm": 1.4039280414581299,
+ "learning_rate": 1.614851485148515e-05,
+ "loss": 0.2068,
+ "step": 1500
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 5751020621660160.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-1878/training_args.bin b/checkpoint-1878/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-1878/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/checkpoint-2191/config.json b/checkpoint-2191/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-2191/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-2191/generation_config.json b/checkpoint-2191/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-2191/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-2191/model.safetensors b/checkpoint-2191/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1b7d94ed4c9ab591170d465555e9b27259cf0ac6
--- /dev/null
+++ b/checkpoint-2191/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3715fdc8ab27cab8e3326ee9f96ff39c51f67781fa83a4e7185fbce523f0f647
+size 2444578688
diff --git a/checkpoint-2191/optimizer.pt b/checkpoint-2191/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..537936c9d00589510f119e9625ed12cdff17da53
--- /dev/null
+++ b/checkpoint-2191/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63538078cfc908a3f2790dae832ba84dc5a072187c63c227db4d55b77335b341
+size 4887473903
diff --git a/checkpoint-2191/rng_state.pth b/checkpoint-2191/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..e98e3921383ba5e46659610a485d2a104919ba2f
--- /dev/null
+++ b/checkpoint-2191/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:637e40723128bc2e097af2e13e354071cfb4955daa28dc2101829d4ed8380cd0
+size 14244
diff --git a/checkpoint-2191/scheduler.pt b/checkpoint-2191/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0b95402d8f0f835c24262c510e56c934cba1b78
--- /dev/null
+++ b/checkpoint-2191/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a10e8b0cb1aec2c0d24d830d97bacf24e577cf7a426abed845491650f16f28bb
+size 1064
diff --git a/checkpoint-2191/sentencepiece.bpe.model b/checkpoint-2191/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-2191/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-2191/special_tokens_map.json b/checkpoint-2191/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-2191/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-2191/tokenizer.json b/checkpoint-2191/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-2191/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-2191/tokenizer_config.json b/checkpoint-2191/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-2191/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-2191/trainer_state.json b/checkpoint-2191/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f11341b20f5f593acb71a2047c59b5eb5d32c623
--- /dev/null
+++ b/checkpoint-2191/trainer_state.json
@@ -0,0 +1,62 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 7.0,
+ "eval_steps": 500,
+ "global_step": 2191,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ },
+ {
+ "epoch": 3.194888178913738,
+ "grad_norm": 1.323052167892456,
+ "learning_rate": 2.10990099009901e-05,
+ "loss": 0.4026,
+ "step": 1000
+ },
+ {
+ "epoch": 4.792332268370607,
+ "grad_norm": 1.4039280414581299,
+ "learning_rate": 1.614851485148515e-05,
+ "loss": 0.2068,
+ "step": 1500
+ },
+ {
+ "epoch": 6.389776357827476,
+ "grad_norm": 0.6008857488632202,
+ "learning_rate": 1.1198019801980199e-05,
+ "loss": 0.1062,
+ "step": 2000
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 6712311980752896.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-2191/training_args.bin b/checkpoint-2191/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-2191/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/checkpoint-2504/config.json b/checkpoint-2504/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-2504/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-2504/generation_config.json b/checkpoint-2504/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-2504/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-2504/model.safetensors b/checkpoint-2504/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c768d38575d60988906089c4faa0c2b4081e8ab7
--- /dev/null
+++ b/checkpoint-2504/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ff000278d81dffc7e911acffd07ae3798d23fd89007a030af7c6363e699f614
+size 2444578688
diff --git a/checkpoint-2504/optimizer.pt b/checkpoint-2504/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7c700cd55952e17d850c0c889a47e5c86cf2c069
--- /dev/null
+++ b/checkpoint-2504/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52b81e3aa2d1fdcd6882dacc3e971e0f44d143c853cb010d26999676c0a0a4c7
+size 4887473903
diff --git a/checkpoint-2504/rng_state.pth b/checkpoint-2504/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..546609cf6994a81fbd4a3aa74e241b5541d0785b
--- /dev/null
+++ b/checkpoint-2504/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba6d05745d81eb8970d32c105f496a5bb40d0139932c387915805e2d3280c6bf
+size 14244
diff --git a/checkpoint-2504/scheduler.pt b/checkpoint-2504/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..82c95251a4e7995f854299f397313d0dd02a9938
--- /dev/null
+++ b/checkpoint-2504/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29ce1ee2791ab6e384afa58710ffb43ee9b5de88edf39b5d48677989e14368a2
+size 1064
diff --git a/checkpoint-2504/sentencepiece.bpe.model b/checkpoint-2504/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-2504/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-2504/special_tokens_map.json b/checkpoint-2504/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-2504/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-2504/tokenizer.json b/checkpoint-2504/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-2504/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-2504/tokenizer_config.json b/checkpoint-2504/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-2504/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-2504/trainer_state.json b/checkpoint-2504/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..14ba125da535e4d271e97b7d5cff1081252e3920
--- /dev/null
+++ b/checkpoint-2504/trainer_state.json
@@ -0,0 +1,69 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 8.0,
+ "eval_steps": 500,
+ "global_step": 2504,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ },
+ {
+ "epoch": 3.194888178913738,
+ "grad_norm": 1.323052167892456,
+ "learning_rate": 2.10990099009901e-05,
+ "loss": 0.4026,
+ "step": 1000
+ },
+ {
+ "epoch": 4.792332268370607,
+ "grad_norm": 1.4039280414581299,
+ "learning_rate": 1.614851485148515e-05,
+ "loss": 0.2068,
+ "step": 1500
+ },
+ {
+ "epoch": 6.389776357827476,
+ "grad_norm": 0.6008857488632202,
+ "learning_rate": 1.1198019801980199e-05,
+ "loss": 0.1062,
+ "step": 2000
+ },
+ {
+ "epoch": 7.987220447284345,
+ "grad_norm": 0.789228081703186,
+ "learning_rate": 6.247524752475248e-06,
+ "loss": 0.0555,
+ "step": 2500
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 7670894427635712.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-2504/training_args.bin b/checkpoint-2504/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-2504/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/checkpoint-2817/config.json b/checkpoint-2817/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-2817/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-2817/generation_config.json b/checkpoint-2817/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-2817/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-2817/model.safetensors b/checkpoint-2817/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0c90a58f1a402602b6dab3240011c690545ab698
--- /dev/null
+++ b/checkpoint-2817/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b87e0fa21823996748de2b56384ae085a9e91877f44f78a63e9dfaf58c754e88
+size 2444578688
diff --git a/checkpoint-2817/optimizer.pt b/checkpoint-2817/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..08d9d9ddfbc490a3a8222cc7b265adc92d46078e
--- /dev/null
+++ b/checkpoint-2817/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb7049ab82ec29491273b6039005a99b7d86034ef0d9c89eda9a5eebaac37376
+size 4887473903
diff --git a/checkpoint-2817/rng_state.pth b/checkpoint-2817/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..5d26ffb0469d619dbb12b26e43eee6bcce119227
--- /dev/null
+++ b/checkpoint-2817/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62f39e9c65a54d796dd705c3ac2098012a649b7be95eb5302a994bdba6442dc8
+size 14244
diff --git a/checkpoint-2817/scheduler.pt b/checkpoint-2817/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b0317e5ccbca29f87732d96f171f3b215e897211
--- /dev/null
+++ b/checkpoint-2817/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3dc4e5fc93cc164724dc0adf565e63211f04021ed369396273da790a9b76666
+size 1064
diff --git a/checkpoint-2817/sentencepiece.bpe.model b/checkpoint-2817/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-2817/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-2817/special_tokens_map.json b/checkpoint-2817/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-2817/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-2817/tokenizer.json b/checkpoint-2817/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-2817/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-2817/tokenizer_config.json b/checkpoint-2817/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-2817/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-2817/trainer_state.json b/checkpoint-2817/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..820813672f9bb20547549bff98df640db6f06839
--- /dev/null
+++ b/checkpoint-2817/trainer_state.json
@@ -0,0 +1,69 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 9.0,
+ "eval_steps": 500,
+ "global_step": 2817,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ },
+ {
+ "epoch": 3.194888178913738,
+ "grad_norm": 1.323052167892456,
+ "learning_rate": 2.10990099009901e-05,
+ "loss": 0.4026,
+ "step": 1000
+ },
+ {
+ "epoch": 4.792332268370607,
+ "grad_norm": 1.4039280414581299,
+ "learning_rate": 1.614851485148515e-05,
+ "loss": 0.2068,
+ "step": 1500
+ },
+ {
+ "epoch": 6.389776357827476,
+ "grad_norm": 0.6008857488632202,
+ "learning_rate": 1.1198019801980199e-05,
+ "loss": 0.1062,
+ "step": 2000
+ },
+ {
+ "epoch": 7.987220447284345,
+ "grad_norm": 0.789228081703186,
+ "learning_rate": 6.247524752475248e-06,
+ "loss": 0.0555,
+ "step": 2500
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 8630357270986752.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-2817/training_args.bin b/checkpoint-2817/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-2817/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/checkpoint-313/config.json b/checkpoint-313/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-313/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-313/generation_config.json b/checkpoint-313/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-313/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-313/model.safetensors b/checkpoint-313/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ed177be2b6d0f8f4be8eaa3dc9ed05eb36767d47
--- /dev/null
+++ b/checkpoint-313/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:572090900b60e3d1b5bb096bb8097d8635f164cb329b1120ea008ed0d09de8e7
+size 2444578688
diff --git a/checkpoint-313/optimizer.pt b/checkpoint-313/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..602889b176d0c6f27898b9a23cf46fae60769deb
--- /dev/null
+++ b/checkpoint-313/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:517454890968776122f8baa134e1fd13634427b8e390973e77529346892eee5f
+size 4887473903
diff --git a/checkpoint-313/rng_state.pth b/checkpoint-313/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..924a0d267cabe36eb4883f42a2e088c00c443bbc
--- /dev/null
+++ b/checkpoint-313/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d57c9aaa96fcc774394394493eb5ee93f52435442a5d57394ccd0c3faede9b5c
+size 14244
diff --git a/checkpoint-313/scheduler.pt b/checkpoint-313/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..942c73073adb22fbd26ffaf74dd27c4c835eba64
--- /dev/null
+++ b/checkpoint-313/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9947e25e4b154fec5385c47f0be8006ab0ae4690aa5795c59e99480d8c4bc06
+size 1064
diff --git a/checkpoint-313/sentencepiece.bpe.model b/checkpoint-313/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-313/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-313/special_tokens_map.json b/checkpoint-313/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-313/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-313/tokenizer.json b/checkpoint-313/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-313/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-313/tokenizer_config.json b/checkpoint-313/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-313/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-313/trainer_state.json b/checkpoint-313/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d18ea0b71737c538f1672c26715d13ff4597f18
--- /dev/null
+++ b/checkpoint-313/trainer_state.json
@@ -0,0 +1,33 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 1.0,
+ "eval_steps": 500,
+ "global_step": 313,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 956957099556864.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-313/training_args.bin b/checkpoint-313/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-313/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/checkpoint-3130/config.json b/checkpoint-3130/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-3130/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-3130/generation_config.json b/checkpoint-3130/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-3130/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-3130/model.safetensors b/checkpoint-3130/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3e5c81bd086c8dbaba32b1204f7eb056b7cfa12b
--- /dev/null
+++ b/checkpoint-3130/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26581a190a90db2a149f605ac07f203b99c3ef969ed2259d6f9edae4f9b2a7f2
+size 2444578688
diff --git a/checkpoint-3130/optimizer.pt b/checkpoint-3130/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5ed0bc6d6fb5dc0ebb4405440a7377691c77020
--- /dev/null
+++ b/checkpoint-3130/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9eebf4d2fc4be68306a2898fb430e384a455abf1fe0ced42822b2a2df70e85b9
+size 4887473903
diff --git a/checkpoint-3130/rng_state.pth b/checkpoint-3130/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7936a195a38ace8b1af4789ed24d5112f9c926e7
--- /dev/null
+++ b/checkpoint-3130/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:375c4ef700422abd726c8208807985531469ab0858a5ee6dbc4ec1236448834b
+size 14244
diff --git a/checkpoint-3130/scheduler.pt b/checkpoint-3130/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c8706e3cf751ea7fc3d1a828895ad9f06fb5fc50
--- /dev/null
+++ b/checkpoint-3130/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:576521f57ce6164abf82c0638a1d0835ebc9679b95cf48c027412ef9fe1fe035
+size 1064
diff --git a/checkpoint-3130/sentencepiece.bpe.model b/checkpoint-3130/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-3130/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-3130/special_tokens_map.json b/checkpoint-3130/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-3130/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-3130/tokenizer.json b/checkpoint-3130/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-3130/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-3130/tokenizer_config.json b/checkpoint-3130/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-3130/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-3130/trainer_state.json b/checkpoint-3130/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..ea23d1e7baa0bda59a6b54e7821b82b4cb7d5b9c
--- /dev/null
+++ b/checkpoint-3130/trainer_state.json
@@ -0,0 +1,76 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 10.0,
+ "eval_steps": 500,
+ "global_step": 3130,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ },
+ {
+ "epoch": 3.194888178913738,
+ "grad_norm": 1.323052167892456,
+ "learning_rate": 2.10990099009901e-05,
+ "loss": 0.4026,
+ "step": 1000
+ },
+ {
+ "epoch": 4.792332268370607,
+ "grad_norm": 1.4039280414581299,
+ "learning_rate": 1.614851485148515e-05,
+ "loss": 0.2068,
+ "step": 1500
+ },
+ {
+ "epoch": 6.389776357827476,
+ "grad_norm": 0.6008857488632202,
+ "learning_rate": 1.1198019801980199e-05,
+ "loss": 0.1062,
+ "step": 2000
+ },
+ {
+ "epoch": 7.987220447284345,
+ "grad_norm": 0.789228081703186,
+ "learning_rate": 6.247524752475248e-06,
+ "loss": 0.0555,
+ "step": 2500
+ },
+ {
+ "epoch": 9.584664536741213,
+ "grad_norm": 0.2787887454032898,
+ "learning_rate": 1.2970297029702972e-06,
+ "loss": 0.0336,
+ "step": 3000
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 9589481500311552.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-3130/training_args.bin b/checkpoint-3130/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-3130/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/checkpoint-626/config.json b/checkpoint-626/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-626/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-626/generation_config.json b/checkpoint-626/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-626/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-626/model.safetensors b/checkpoint-626/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e5d5fe9f36344c4a3cf054452a5a55aa86d5489a
--- /dev/null
+++ b/checkpoint-626/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bed131c20cd14d2d45f9719b44732275c65269a18aa6473b51842b806bdf866
+size 2444578688
diff --git a/checkpoint-626/optimizer.pt b/checkpoint-626/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec04048366121d8ee52b27e29746ae14056c206c
--- /dev/null
+++ b/checkpoint-626/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d49953eb97a3b1b4edc307da7d4ddd0d3201cd841ffcba8d89ef39f06d1061a8
+size 4887473903
diff --git a/checkpoint-626/rng_state.pth b/checkpoint-626/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..d15653d03d372251e0d8707cebb110f67dd3f24f
--- /dev/null
+++ b/checkpoint-626/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:652a9baf52010902046aa8af83824a30c0231716204fb305c979a9129756e231
+size 14244
diff --git a/checkpoint-626/scheduler.pt b/checkpoint-626/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9d429de181e82088eb7dc7feb3b67595d3171b1
--- /dev/null
+++ b/checkpoint-626/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3346e1ecf69af61eac82d54c5fc86115279382a4f440817692128ac6afdeeb4d
+size 1064
diff --git a/checkpoint-626/sentencepiece.bpe.model b/checkpoint-626/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-626/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-626/special_tokens_map.json b/checkpoint-626/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-626/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-626/tokenizer.json b/checkpoint-626/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-626/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-626/tokenizer_config.json b/checkpoint-626/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-626/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-626/trainer_state.json b/checkpoint-626/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f7679a78db839009850354c7682bf0b7e9d0c423
--- /dev/null
+++ b/checkpoint-626/trainer_state.json
@@ -0,0 +1,41 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 2.0,
+ "eval_steps": 500,
+ "global_step": 626,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 1916081328881664.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-626/training_args.bin b/checkpoint-626/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-626/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/checkpoint-939/config.json b/checkpoint-939/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/checkpoint-939/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/checkpoint-939/generation_config.json b/checkpoint-939/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/checkpoint-939/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/checkpoint-939/model.safetensors b/checkpoint-939/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a2bfab6283e0eb389e991f3853e00e071e6ea46d
--- /dev/null
+++ b/checkpoint-939/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9694839dcc2469bb701069d2b8ea74fca902be1c38f3787d3ff24e73d26face
+size 2444578688
diff --git a/checkpoint-939/optimizer.pt b/checkpoint-939/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..260a5c38f35713d936e63e177c008b7ef707cadb
--- /dev/null
+++ b/checkpoint-939/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3456a12b933c36725bfec8e6eb0439cf0a514519cd1d0334798d465b1912a07
+size 4887473903
diff --git a/checkpoint-939/rng_state.pth b/checkpoint-939/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..0d97150c1649eb64c5cb27ffc676a82cea4a32b5
--- /dev/null
+++ b/checkpoint-939/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d08114cf581a5ea7b461376deb187d6bed683843bbce50eeda0cfa79238be1e5
+size 14244
diff --git a/checkpoint-939/scheduler.pt b/checkpoint-939/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c787429b0949c274f04c16760afeb7134c9e4fa
--- /dev/null
+++ b/checkpoint-939/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da727ab9e4d518533045b5030ae13a3162d685874ee8570496a8365d7e9ccbc3
+size 1064
diff --git a/checkpoint-939/sentencepiece.bpe.model b/checkpoint-939/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/checkpoint-939/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/checkpoint-939/special_tokens_map.json b/checkpoint-939/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/checkpoint-939/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/checkpoint-939/tokenizer.json b/checkpoint-939/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/checkpoint-939/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/checkpoint-939/tokenizer_config.json b/checkpoint-939/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/checkpoint-939/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/checkpoint-939/trainer_state.json b/checkpoint-939/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..54a82ebe1f5d4f6b0c39170a401227028a4d61db
--- /dev/null
+++ b/checkpoint-939/trainer_state.json
@@ -0,0 +1,41 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 3.0,
+ "eval_steps": 500,
+ "global_step": 939,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": false
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 2874934666985472.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}
diff --git a/checkpoint-939/training_args.bin b/checkpoint-939/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcac6cedb22e9d4611ac98f6aeb269c14779ef92
--- /dev/null
+++ b/checkpoint-939/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17cce991b747f9aeab0e1f705402f34ea425dd51c5911b700c54382ea3552a3
+size 5432
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f217c62c0b10c246fb7682f9e99031a11b2e782a
--- /dev/null
+++ b/config.json
@@ -0,0 +1,57 @@
+{
+ "_num_labels": 3,
+ "activation_dropout": 0.0,
+ "activation_function": "relu",
+ "add_bias_logits": false,
+ "add_final_layer_norm": true,
+ "architectures": [
+ "MBartForConditionalGeneration"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 0,
+ "classif_dropout": 0.0,
+ "classifier_dropout": 0.0,
+ "d_model": 1024,
+ "decoder_attention_heads": 16,
+ "decoder_ffn_dim": 4096,
+ "decoder_layerdrop": 0.0,
+ "decoder_layers": 12,
+ "decoder_start_token_id": 2,
+ "dropout": 0.1,
+ "early_stopping": null,
+ "encoder_attention_heads": 16,
+ "encoder_ffn_dim": 4096,
+ "encoder_layerdrop": 0.0,
+ "encoder_layers": 12,
+ "eos_token_id": 2,
+ "forced_eos_token_id": 2,
+ "gradient_checkpointing": false,
+ "id2label": {
+ "0": "LABEL_0",
+ "1": "LABEL_1",
+ "2": "LABEL_2"
+ },
+ "init_std": 0.02,
+ "is_encoder_decoder": true,
+ "label2id": {
+ "LABEL_0": 0,
+ "LABEL_1": 1,
+ "LABEL_2": 2
+ },
+ "max_length": null,
+ "max_position_embeddings": 1024,
+ "model_type": "mbart",
+ "normalize_before": true,
+ "normalize_embedding": true,
+ "num_beams": null,
+ "num_hidden_layers": 12,
+ "output_past": true,
+ "pad_token_id": 1,
+ "scale_embedding": true,
+ "static_position_embeddings": false,
+ "tokenizer_class": "MBart50Tokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.52.4",
+ "use_cache": true,
+ "vocab_size": 250054
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..378949a2f02124cebfbdb801959b357399b61075
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,13 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 0,
+ "decoder_start_token_id": 2,
+ "early_stopping": true,
+ "eos_token_id": 2,
+ "forced_bos_token_id": 250025,
+ "forced_eos_token_id": 2,
+ "max_length": 200,
+ "num_beams": 5,
+ "pad_token_id": 1,
+ "transformers_version": "4.52.4"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3e5c81bd086c8dbaba32b1204f7eb056b7cfa12b
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26581a190a90db2a149f605ac07f203b99c3ef969ed2259d6f9edae4f9b2a7f2
+size 2444578688
diff --git a/sentencepiece.bpe.model b/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..92619141640d5fcbb4429807de2248352b0dca79
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,69 @@
+{
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..558068bc5dab1aac7d983ab5ad1d8c62fa655185
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adaea8519d41bcddb4490c282395df876757aa7c2157eb8e153eaaba8093c85c
+size 17110076
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6d0b2d2d52f6012192ea0ef272dd177158e3429f
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,529 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "ar_AR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250002": {
+ "content": "cs_CZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250003": {
+ "content": "de_DE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250004": {
+ "content": "en_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250005": {
+ "content": "es_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250006": {
+ "content": "et_EE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250007": {
+ "content": "fi_FI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250008": {
+ "content": "fr_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250009": {
+ "content": "gu_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250010": {
+ "content": "hi_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250011": {
+ "content": "it_IT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250012": {
+ "content": "ja_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250013": {
+ "content": "kk_KZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250014": {
+ "content": "ko_KR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250015": {
+ "content": "lt_LT",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250016": {
+ "content": "lv_LV",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250017": {
+ "content": "my_MM",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250018": {
+ "content": "ne_NP",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250019": {
+ "content": "nl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250020": {
+ "content": "ro_RO",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250021": {
+ "content": "ru_RU",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250022": {
+ "content": "si_LK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250023": {
+ "content": "tr_TR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250024": {
+ "content": "vi_VN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250025": {
+ "content": "zh_CN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250026": {
+ "content": "af_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250027": {
+ "content": "az_AZ",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250028": {
+ "content": "bn_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250029": {
+ "content": "fa_IR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250030": {
+ "content": "he_IL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250031": {
+ "content": "hr_HR",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250032": {
+ "content": "id_ID",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250033": {
+ "content": "ka_GE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250034": {
+ "content": "km_KH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250035": {
+ "content": "mk_MK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250036": {
+ "content": "ml_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250037": {
+ "content": "mn_MN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250038": {
+ "content": "mr_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250039": {
+ "content": "pl_PL",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250040": {
+ "content": "ps_AF",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250041": {
+ "content": "pt_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250042": {
+ "content": "sv_SE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250043": {
+ "content": "sw_KE",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250044": {
+ "content": "ta_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250045": {
+ "content": "te_IN",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250046": {
+ "content": "th_TH",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250047": {
+ "content": "tl_XX",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250048": {
+ "content": "uk_UA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250049": {
+ "content": "ur_PK",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250050": {
+ "content": "xh_ZA",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250051": {
+ "content": "gl_ES",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250052": {
+ "content": "sl_SI",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250053": {
+ "content": "",
+ "lstrip": true,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "ar_AR",
+ "cs_CZ",
+ "de_DE",
+ "en_XX",
+ "es_XX",
+ "et_EE",
+ "fi_FI",
+ "fr_XX",
+ "gu_IN",
+ "hi_IN",
+ "it_IT",
+ "ja_XX",
+ "kk_KZ",
+ "ko_KR",
+ "lt_LT",
+ "lv_LV",
+ "my_MM",
+ "ne_NP",
+ "nl_XX",
+ "ro_RO",
+ "ru_RU",
+ "si_LK",
+ "tr_TR",
+ "vi_VN",
+ "zh_CN",
+ "af_ZA",
+ "az_AZ",
+ "bn_IN",
+ "fa_IR",
+ "he_IL",
+ "hr_HR",
+ "id_ID",
+ "ka_GE",
+ "km_KH",
+ "mk_MK",
+ "ml_IN",
+ "mn_MN",
+ "mr_IN",
+ "pl_PL",
+ "ps_AF",
+ "pt_XX",
+ "sv_SE",
+ "sw_KE",
+ "ta_IN",
+ "te_IN",
+ "th_TH",
+ "tl_XX",
+ "uk_UA",
+ "ur_PK",
+ "xh_ZA",
+ "gl_ES",
+ "sl_SI"
+ ],
+ "bos_token": "",
+ "clean_up_tokenization_spaces": false,
+ "cls_token": "",
+ "eos_token": "",
+ "extra_special_tokens": {},
+ "language_codes": "ML50",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "src_lang": "tl_XX",
+ "tgt_lang": "zh_CN",
+ "tokenizer_class": "MBart50Tokenizer",
+ "unk_token": ""
+}
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..572b9f2a7c132cfc0b2cf665456ee6b00aa326cb
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,85 @@
+{
+ "best_global_step": null,
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 10.0,
+ "eval_steps": 500,
+ "global_step": 3130,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.5974440894568689,
+ "grad_norm": 1.9811842441558838,
+ "learning_rate": 2.604950495049505e-05,
+ "loss": 2.047,
+ "step": 500
+ },
+ {
+ "epoch": 3.194888178913738,
+ "grad_norm": 1.323052167892456,
+ "learning_rate": 2.10990099009901e-05,
+ "loss": 0.4026,
+ "step": 1000
+ },
+ {
+ "epoch": 4.792332268370607,
+ "grad_norm": 1.4039280414581299,
+ "learning_rate": 1.614851485148515e-05,
+ "loss": 0.2068,
+ "step": 1500
+ },
+ {
+ "epoch": 6.389776357827476,
+ "grad_norm": 0.6008857488632202,
+ "learning_rate": 1.1198019801980199e-05,
+ "loss": 0.1062,
+ "step": 2000
+ },
+ {
+ "epoch": 7.987220447284345,
+ "grad_norm": 0.789228081703186,
+ "learning_rate": 6.247524752475248e-06,
+ "loss": 0.0555,
+ "step": 2500
+ },
+ {
+ "epoch": 9.584664536741213,
+ "grad_norm": 0.2787887454032898,
+ "learning_rate": 1.2970297029702972e-06,
+ "loss": 0.0336,
+ "step": 3000
+ },
+ {
+ "epoch": 10.0,
+ "step": 3130,
+ "total_flos": 9589481500311552.0,
+ "train_loss": 0.4568236516306575,
+ "train_runtime": 1164.9898,
+ "train_samples_per_second": 21.477,
+ "train_steps_per_second": 2.687
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 3130,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 10,
+ "save_steps": 500,
+ "stateful_callbacks": {
+ "TrainerControl": {
+ "args": {
+ "should_epoch_stop": false,
+ "should_evaluate": false,
+ "should_log": false,
+ "should_save": true,
+ "should_training_stop": true
+ },
+ "attributes": {}
+ }
+ },
+ "total_flos": 9589481500311552.0,
+ "train_batch_size": 8,
+ "trial_name": null,
+ "trial_params": null
+}