Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +14 -0
- checkpoint-10275/config.json +56 -0
- checkpoint-10275/generation_config.json +16 -0
- checkpoint-10275/model.safetensors +3 -0
- checkpoint-10275/optimizer.pt +3 -0
- checkpoint-10275/rng_state.pth +3 -0
- checkpoint-10275/scheduler.pt +3 -0
- checkpoint-10275/source.spm +3 -0
- checkpoint-10275/special_tokens_map.json +5 -0
- checkpoint-10275/target.spm +3 -0
- checkpoint-10275/tokenizer_config.json +39 -0
- checkpoint-10275/trainer_state.json +248 -0
- checkpoint-10275/training_args.bin +3 -0
- checkpoint-10275/vocab.json +0 -0
- checkpoint-12330/config.json +56 -0
- checkpoint-12330/generation_config.json +16 -0
- checkpoint-12330/model.safetensors +3 -0
- checkpoint-12330/optimizer.pt +3 -0
- checkpoint-12330/rng_state.pth +3 -0
- checkpoint-12330/scheduler.pt +3 -0
- checkpoint-12330/source.spm +3 -0
- checkpoint-12330/special_tokens_map.json +5 -0
- checkpoint-12330/target.spm +3 -0
- checkpoint-12330/tokenizer_config.json +39 -0
- checkpoint-12330/trainer_state.json +291 -0
- checkpoint-12330/training_args.bin +3 -0
- checkpoint-12330/vocab.json +0 -0
- checkpoint-2055/config.json +56 -0
- checkpoint-2055/generation_config.json +16 -0
- checkpoint-2055/model.safetensors +3 -0
- checkpoint-2055/optimizer.pt +3 -0
- checkpoint-2055/rng_state.pth +3 -0
- checkpoint-2055/scheduler.pt +3 -0
- checkpoint-2055/source.spm +3 -0
- checkpoint-2055/special_tokens_map.json +5 -0
- checkpoint-2055/target.spm +3 -0
- checkpoint-2055/tokenizer_config.json +39 -0
- checkpoint-2055/trainer_state.json +76 -0
- checkpoint-2055/training_args.bin +3 -0
- checkpoint-2055/vocab.json +0 -0
- checkpoint-4110/config.json +56 -0
- checkpoint-4110/generation_config.json +16 -0
- checkpoint-4110/model.safetensors +3 -0
- checkpoint-4110/optimizer.pt +3 -0
- checkpoint-4110/rng_state.pth +3 -0
- checkpoint-4110/scheduler.pt +3 -0
- checkpoint-4110/source.spm +3 -0
- checkpoint-4110/special_tokens_map.json +5 -0
- checkpoint-4110/target.spm +3 -0
- checkpoint-4110/tokenizer_config.json +39 -0
.gitattributes
CHANGED
@@ -33,3 +33,17 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
checkpoint-10275/source.spm filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoint-10275/target.spm filter=lfs diff=lfs merge=lfs -text
|
38 |
+
checkpoint-12330/source.spm filter=lfs diff=lfs merge=lfs -text
|
39 |
+
checkpoint-12330/target.spm filter=lfs diff=lfs merge=lfs -text
|
40 |
+
checkpoint-2055/source.spm filter=lfs diff=lfs merge=lfs -text
|
41 |
+
checkpoint-2055/target.spm filter=lfs diff=lfs merge=lfs -text
|
42 |
+
checkpoint-4110/source.spm filter=lfs diff=lfs merge=lfs -text
|
43 |
+
checkpoint-4110/target.spm filter=lfs diff=lfs merge=lfs -text
|
44 |
+
checkpoint-6165/source.spm filter=lfs diff=lfs merge=lfs -text
|
45 |
+
checkpoint-6165/target.spm filter=lfs diff=lfs merge=lfs -text
|
46 |
+
checkpoint-8220/source.spm filter=lfs diff=lfs merge=lfs -text
|
47 |
+
checkpoint-8220/target.spm filter=lfs diff=lfs merge=lfs -text
|
48 |
+
source.spm filter=lfs diff=lfs merge=lfs -text
|
49 |
+
target.spm filter=lfs diff=lfs merge=lfs -text
|
checkpoint-10275/config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Helsinki-NLP/opus-mt-ko-en",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"activation_function": "swish",
|
5 |
+
"add_bias_logits": false,
|
6 |
+
"add_final_layer_norm": false,
|
7 |
+
"architectures": [
|
8 |
+
"MarianMTModel"
|
9 |
+
],
|
10 |
+
"attention_dropout": 0.0,
|
11 |
+
"bos_token_id": 0,
|
12 |
+
"classif_dropout": 0.0,
|
13 |
+
"classifier_dropout": 0.0,
|
14 |
+
"d_model": 512,
|
15 |
+
"decoder_attention_heads": 8,
|
16 |
+
"decoder_ffn_dim": 2048,
|
17 |
+
"decoder_layerdrop": 0.0,
|
18 |
+
"decoder_layers": 6,
|
19 |
+
"decoder_start_token_id": 65000,
|
20 |
+
"decoder_vocab_size": 65001,
|
21 |
+
"dropout": 0.1,
|
22 |
+
"encoder_attention_heads": 8,
|
23 |
+
"encoder_ffn_dim": 2048,
|
24 |
+
"encoder_layerdrop": 0.0,
|
25 |
+
"encoder_layers": 6,
|
26 |
+
"eos_token_id": 0,
|
27 |
+
"extra_pos_embeddings": 65001,
|
28 |
+
"forced_eos_token_id": 0,
|
29 |
+
"id2label": {
|
30 |
+
"0": "LABEL_0",
|
31 |
+
"1": "LABEL_1",
|
32 |
+
"2": "LABEL_2"
|
33 |
+
},
|
34 |
+
"init_std": 0.02,
|
35 |
+
"is_encoder_decoder": true,
|
36 |
+
"label2id": {
|
37 |
+
"LABEL_0": 0,
|
38 |
+
"LABEL_1": 1,
|
39 |
+
"LABEL_2": 2
|
40 |
+
},
|
41 |
+
"max_length": null,
|
42 |
+
"max_position_embeddings": 512,
|
43 |
+
"model_type": "marian",
|
44 |
+
"normalize_before": false,
|
45 |
+
"normalize_embedding": false,
|
46 |
+
"num_beams": null,
|
47 |
+
"num_hidden_layers": 6,
|
48 |
+
"pad_token_id": 65000,
|
49 |
+
"scale_embedding": true,
|
50 |
+
"share_encoder_decoder_embeddings": true,
|
51 |
+
"static_position_embeddings": true,
|
52 |
+
"torch_dtype": "float32",
|
53 |
+
"transformers_version": "4.48.2",
|
54 |
+
"use_cache": true,
|
55 |
+
"vocab_size": 65001
|
56 |
+
}
|
checkpoint-10275/generation_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bad_words_ids": [
|
3 |
+
[
|
4 |
+
65000
|
5 |
+
]
|
6 |
+
],
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"decoder_start_token_id": 65000,
|
9 |
+
"eos_token_id": 0,
|
10 |
+
"forced_eos_token_id": 0,
|
11 |
+
"max_length": 512,
|
12 |
+
"num_beams": 6,
|
13 |
+
"pad_token_id": 65000,
|
14 |
+
"renormalize_logits": true,
|
15 |
+
"transformers_version": "4.48.2"
|
16 |
+
}
|
checkpoint-10275/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f4e3f7789d38c105efa2c6e6e04d38e30c63f68011e6683c2d5e8185a72ba58
|
3 |
+
size 309965092
|
checkpoint-10275/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9cd1e1259bafae641c5c4ab25bb14c443c63d628045ff7a2df9f330f266e95d
|
3 |
+
size 619563642
|
checkpoint-10275/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0ccb58965489e6c35cc01e2cf60c91337706ca229a2b669da1c88d9dba64aea8
|
3 |
+
size 14244
|
checkpoint-10275/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f81ae6b013192aa31c073f7f95d5ab45856e22196a63e3761f7ccf395a42e1e8
|
3 |
+
size 1064
|
checkpoint-10275/source.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9496f7c2be9aecb84c751ae9f35a875915dde8e3892f652a5c76811ab2a0f49
|
3 |
+
size 841805
|
checkpoint-10275/special_tokens_map.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eos_token": "</s>",
|
3 |
+
"pad_token": "<pad>",
|
4 |
+
"unk_token": "<unk>"
|
5 |
+
}
|
checkpoint-10275/target.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1531ac8576fe64267dfca0dc33950a6e9a2d3fd9e05346558ad7ea5ee0e65bf
|
3 |
+
size 813126
|
checkpoint-10275/tokenizer_config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "</s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<unk>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"65000": {
|
20 |
+
"content": "<pad>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
}
|
27 |
+
},
|
28 |
+
"clean_up_tokenization_spaces": false,
|
29 |
+
"eos_token": "</s>",
|
30 |
+
"extra_special_tokens": {},
|
31 |
+
"model_max_length": 512,
|
32 |
+
"pad_token": "<pad>",
|
33 |
+
"separate_vocabs": false,
|
34 |
+
"source_lang": "kor",
|
35 |
+
"sp_model_kwargs": {},
|
36 |
+
"target_lang": "eng",
|
37 |
+
"tokenizer_class": "MarianTokenizer",
|
38 |
+
"unk_token": "<unk>"
|
39 |
+
}
|
checkpoint-10275/trainer_state.json
ADDED
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 5.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 10275,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.24330900243309003,
|
13 |
+
"grad_norm": 14.526427268981934,
|
14 |
+
"learning_rate": 1.9195458231954583e-05,
|
15 |
+
"loss": 3.5943,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.48661800486618007,
|
20 |
+
"grad_norm": 16.526565551757812,
|
21 |
+
"learning_rate": 1.8384428223844285e-05,
|
22 |
+
"loss": 3.1431,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.7299270072992701,
|
27 |
+
"grad_norm": 11.656450271606445,
|
28 |
+
"learning_rate": 1.7573398215733984e-05,
|
29 |
+
"loss": 2.9201,
|
30 |
+
"step": 1500
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.9732360097323601,
|
34 |
+
"grad_norm": 13.380396842956543,
|
35 |
+
"learning_rate": 1.6762368207623682e-05,
|
36 |
+
"loss": 2.7711,
|
37 |
+
"step": 2000
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 1.0,
|
41 |
+
"eval_bleu": 12.810223940577195,
|
42 |
+
"eval_loss": 2.420867681503296,
|
43 |
+
"eval_rouge": {
|
44 |
+
"rouge1": 0.30991633445023503,
|
45 |
+
"rouge2": 0.11181858129315748,
|
46 |
+
"rougeL": 0.3092627118644087,
|
47 |
+
"rougeLsum": 0.30911397265634744
|
48 |
+
},
|
49 |
+
"eval_runtime": 264.2267,
|
50 |
+
"eval_samples_per_second": 33.494,
|
51 |
+
"eval_steps_per_second": 2.097,
|
52 |
+
"step": 2055
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 1.2165450121654502,
|
56 |
+
"grad_norm": 13.43653392791748,
|
57 |
+
"learning_rate": 1.5951338199513384e-05,
|
58 |
+
"loss": 2.3949,
|
59 |
+
"step": 2500
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 1.4598540145985401,
|
63 |
+
"grad_norm": 13.36623764038086,
|
64 |
+
"learning_rate": 1.5140308191403083e-05,
|
65 |
+
"loss": 2.2468,
|
66 |
+
"step": 3000
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 1.7031630170316303,
|
70 |
+
"grad_norm": 12.249001502990723,
|
71 |
+
"learning_rate": 1.4329278183292783e-05,
|
72 |
+
"loss": 2.2239,
|
73 |
+
"step": 3500
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 1.94647201946472,
|
77 |
+
"grad_norm": 14.571913719177246,
|
78 |
+
"learning_rate": 1.3518248175182482e-05,
|
79 |
+
"loss": 2.1585,
|
80 |
+
"step": 4000
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 2.0,
|
84 |
+
"eval_bleu": 18.428136367860084,
|
85 |
+
"eval_loss": 2.1254029273986816,
|
86 |
+
"eval_rouge": {
|
87 |
+
"rouge1": 0.3476506591337114,
|
88 |
+
"rouge2": 0.13975840731772934,
|
89 |
+
"rougeL": 0.34719576719576883,
|
90 |
+
"rougeLsum": 0.3470716527665696
|
91 |
+
},
|
92 |
+
"eval_runtime": 264.8536,
|
93 |
+
"eval_samples_per_second": 33.415,
|
94 |
+
"eval_steps_per_second": 2.092,
|
95 |
+
"step": 4110
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"epoch": 2.18978102189781,
|
99 |
+
"grad_norm": 12.905760765075684,
|
100 |
+
"learning_rate": 1.2707218167072182e-05,
|
101 |
+
"loss": 1.9148,
|
102 |
+
"step": 4500
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"epoch": 2.4330900243309004,
|
106 |
+
"grad_norm": 13.12757682800293,
|
107 |
+
"learning_rate": 1.1896188158961884e-05,
|
108 |
+
"loss": 1.8616,
|
109 |
+
"step": 5000
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"epoch": 2.67639902676399,
|
113 |
+
"grad_norm": 13.198368072509766,
|
114 |
+
"learning_rate": 1.1085158150851583e-05,
|
115 |
+
"loss": 1.8182,
|
116 |
+
"step": 5500
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 2.9197080291970803,
|
120 |
+
"grad_norm": 11.821575164794922,
|
121 |
+
"learning_rate": 1.0274128142741283e-05,
|
122 |
+
"loss": 1.8343,
|
123 |
+
"step": 6000
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"epoch": 3.0,
|
127 |
+
"eval_bleu": 20.618391393580144,
|
128 |
+
"eval_loss": 1.9738637208938599,
|
129 |
+
"eval_rouge": {
|
130 |
+
"rouge1": 0.3669825372367759,
|
131 |
+
"rouge2": 0.153052954891938,
|
132 |
+
"rougeL": 0.3663996747132363,
|
133 |
+
"rougeLsum": 0.36649283390808984
|
134 |
+
},
|
135 |
+
"eval_runtime": 277.0353,
|
136 |
+
"eval_samples_per_second": 31.945,
|
137 |
+
"eval_steps_per_second": 2.0,
|
138 |
+
"step": 6165
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"epoch": 3.1630170316301705,
|
142 |
+
"grad_norm": 9.874231338500977,
|
143 |
+
"learning_rate": 9.463098134630983e-06,
|
144 |
+
"loss": 1.652,
|
145 |
+
"step": 6500
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"epoch": 3.40632603406326,
|
149 |
+
"grad_norm": 13.481595993041992,
|
150 |
+
"learning_rate": 8.652068126520682e-06,
|
151 |
+
"loss": 1.5882,
|
152 |
+
"step": 7000
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 3.6496350364963503,
|
156 |
+
"grad_norm": 12.598729133605957,
|
157 |
+
"learning_rate": 7.841038118410382e-06,
|
158 |
+
"loss": 1.5795,
|
159 |
+
"step": 7500
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 3.8929440389294405,
|
163 |
+
"grad_norm": 10.078117370605469,
|
164 |
+
"learning_rate": 7.030008110300081e-06,
|
165 |
+
"loss": 1.6081,
|
166 |
+
"step": 8000
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 4.0,
|
170 |
+
"eval_bleu": 21.371071937079442,
|
171 |
+
"eval_loss": 1.8978888988494873,
|
172 |
+
"eval_rouge": {
|
173 |
+
"rouge1": 0.3791688230162823,
|
174 |
+
"rouge2": 0.16206698950766746,
|
175 |
+
"rougeL": 0.37860440156203046,
|
176 |
+
"rougeLsum": 0.3785680330341368
|
177 |
+
},
|
178 |
+
"eval_runtime": 270.921,
|
179 |
+
"eval_samples_per_second": 32.666,
|
180 |
+
"eval_steps_per_second": 2.045,
|
181 |
+
"step": 8220
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"epoch": 4.13625304136253,
|
185 |
+
"grad_norm": 13.678194999694824,
|
186 |
+
"learning_rate": 6.2206001622060015e-06,
|
187 |
+
"loss": 1.5118,
|
188 |
+
"step": 8500
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 4.37956204379562,
|
192 |
+
"grad_norm": 11.986939430236816,
|
193 |
+
"learning_rate": 5.409570154095703e-06,
|
194 |
+
"loss": 1.445,
|
195 |
+
"step": 9000
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"epoch": 4.622871046228711,
|
199 |
+
"grad_norm": 12.918234825134277,
|
200 |
+
"learning_rate": 4.598540145985402e-06,
|
201 |
+
"loss": 1.447,
|
202 |
+
"step": 9500
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"epoch": 4.866180048661801,
|
206 |
+
"grad_norm": 10.726964950561523,
|
207 |
+
"learning_rate": 3.7875101378751015e-06,
|
208 |
+
"loss": 1.406,
|
209 |
+
"step": 10000
|
210 |
+
},
|
211 |
+
{
|
212 |
+
"epoch": 5.0,
|
213 |
+
"eval_bleu": 22.3831585320865,
|
214 |
+
"eval_loss": 1.859816074371338,
|
215 |
+
"eval_rouge": {
|
216 |
+
"rouge1": 0.3916949600932663,
|
217 |
+
"rouge2": 0.16823271455474847,
|
218 |
+
"rougeL": 0.39121746928526785,
|
219 |
+
"rougeLsum": 0.39116348309568805
|
220 |
+
},
|
221 |
+
"eval_runtime": 277.4168,
|
222 |
+
"eval_samples_per_second": 31.901,
|
223 |
+
"eval_steps_per_second": 1.997,
|
224 |
+
"step": 10275
|
225 |
+
}
|
226 |
+
],
|
227 |
+
"logging_steps": 500,
|
228 |
+
"max_steps": 12330,
|
229 |
+
"num_input_tokens_seen": 0,
|
230 |
+
"num_train_epochs": 6,
|
231 |
+
"save_steps": 500,
|
232 |
+
"stateful_callbacks": {
|
233 |
+
"TrainerControl": {
|
234 |
+
"args": {
|
235 |
+
"should_epoch_stop": false,
|
236 |
+
"should_evaluate": false,
|
237 |
+
"should_log": false,
|
238 |
+
"should_save": true,
|
239 |
+
"should_training_stop": false
|
240 |
+
},
|
241 |
+
"attributes": {}
|
242 |
+
}
|
243 |
+
},
|
244 |
+
"total_flos": 859319313629184.0,
|
245 |
+
"train_batch_size": 16,
|
246 |
+
"trial_name": null,
|
247 |
+
"trial_params": null
|
248 |
+
}
|
checkpoint-10275/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f23aa42f71fb49bdefda103a1093e935cb64d8290342e3b02d230058bfcccbc
|
3 |
+
size 5432
|
checkpoint-10275/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-12330/config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Helsinki-NLP/opus-mt-ko-en",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"activation_function": "swish",
|
5 |
+
"add_bias_logits": false,
|
6 |
+
"add_final_layer_norm": false,
|
7 |
+
"architectures": [
|
8 |
+
"MarianMTModel"
|
9 |
+
],
|
10 |
+
"attention_dropout": 0.0,
|
11 |
+
"bos_token_id": 0,
|
12 |
+
"classif_dropout": 0.0,
|
13 |
+
"classifier_dropout": 0.0,
|
14 |
+
"d_model": 512,
|
15 |
+
"decoder_attention_heads": 8,
|
16 |
+
"decoder_ffn_dim": 2048,
|
17 |
+
"decoder_layerdrop": 0.0,
|
18 |
+
"decoder_layers": 6,
|
19 |
+
"decoder_start_token_id": 65000,
|
20 |
+
"decoder_vocab_size": 65001,
|
21 |
+
"dropout": 0.1,
|
22 |
+
"encoder_attention_heads": 8,
|
23 |
+
"encoder_ffn_dim": 2048,
|
24 |
+
"encoder_layerdrop": 0.0,
|
25 |
+
"encoder_layers": 6,
|
26 |
+
"eos_token_id": 0,
|
27 |
+
"extra_pos_embeddings": 65001,
|
28 |
+
"forced_eos_token_id": 0,
|
29 |
+
"id2label": {
|
30 |
+
"0": "LABEL_0",
|
31 |
+
"1": "LABEL_1",
|
32 |
+
"2": "LABEL_2"
|
33 |
+
},
|
34 |
+
"init_std": 0.02,
|
35 |
+
"is_encoder_decoder": true,
|
36 |
+
"label2id": {
|
37 |
+
"LABEL_0": 0,
|
38 |
+
"LABEL_1": 1,
|
39 |
+
"LABEL_2": 2
|
40 |
+
},
|
41 |
+
"max_length": null,
|
42 |
+
"max_position_embeddings": 512,
|
43 |
+
"model_type": "marian",
|
44 |
+
"normalize_before": false,
|
45 |
+
"normalize_embedding": false,
|
46 |
+
"num_beams": null,
|
47 |
+
"num_hidden_layers": 6,
|
48 |
+
"pad_token_id": 65000,
|
49 |
+
"scale_embedding": true,
|
50 |
+
"share_encoder_decoder_embeddings": true,
|
51 |
+
"static_position_embeddings": true,
|
52 |
+
"torch_dtype": "float32",
|
53 |
+
"transformers_version": "4.48.2",
|
54 |
+
"use_cache": true,
|
55 |
+
"vocab_size": 65001
|
56 |
+
}
|
checkpoint-12330/generation_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bad_words_ids": [
|
3 |
+
[
|
4 |
+
65000
|
5 |
+
]
|
6 |
+
],
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"decoder_start_token_id": 65000,
|
9 |
+
"eos_token_id": 0,
|
10 |
+
"forced_eos_token_id": 0,
|
11 |
+
"max_length": 512,
|
12 |
+
"num_beams": 6,
|
13 |
+
"pad_token_id": 65000,
|
14 |
+
"renormalize_logits": true,
|
15 |
+
"transformers_version": "4.48.2"
|
16 |
+
}
|
checkpoint-12330/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b026240c8ec3e2f82cad42ffd832821afecfc924fd7dfdf35fa9dcefa9e8f18
|
3 |
+
size 309965092
|
checkpoint-12330/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:197f5138030a4addd203dceaeccca75f703ae995bbadb4516d43442ec2123b0c
|
3 |
+
size 619563642
|
checkpoint-12330/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd47ba0a7e6ce7e5ee851ca82a5aefd6a5a08aec1886890e8730f3e5cb008eee
|
3 |
+
size 14244
|
checkpoint-12330/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bc590f19d55470cfe8087ce46f286ba7419d17991cd31666230d9d7d3795f09
|
3 |
+
size 1064
|
checkpoint-12330/source.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9496f7c2be9aecb84c751ae9f35a875915dde8e3892f652a5c76811ab2a0f49
|
3 |
+
size 841805
|
checkpoint-12330/special_tokens_map.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eos_token": "</s>",
|
3 |
+
"pad_token": "<pad>",
|
4 |
+
"unk_token": "<unk>"
|
5 |
+
}
|
checkpoint-12330/target.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1531ac8576fe64267dfca0dc33950a6e9a2d3fd9e05346558ad7ea5ee0e65bf
|
3 |
+
size 813126
|
checkpoint-12330/tokenizer_config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "</s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<unk>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"65000": {
|
20 |
+
"content": "<pad>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
}
|
27 |
+
},
|
28 |
+
"clean_up_tokenization_spaces": false,
|
29 |
+
"eos_token": "</s>",
|
30 |
+
"extra_special_tokens": {},
|
31 |
+
"model_max_length": 512,
|
32 |
+
"pad_token": "<pad>",
|
33 |
+
"separate_vocabs": false,
|
34 |
+
"source_lang": "kor",
|
35 |
+
"sp_model_kwargs": {},
|
36 |
+
"target_lang": "eng",
|
37 |
+
"tokenizer_class": "MarianTokenizer",
|
38 |
+
"unk_token": "<unk>"
|
39 |
+
}
|
checkpoint-12330/trainer_state.json
ADDED
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 6.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 12330,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.24330900243309003,
|
13 |
+
"grad_norm": 14.526427268981934,
|
14 |
+
"learning_rate": 1.9195458231954583e-05,
|
15 |
+
"loss": 3.5943,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.48661800486618007,
|
20 |
+
"grad_norm": 16.526565551757812,
|
21 |
+
"learning_rate": 1.8384428223844285e-05,
|
22 |
+
"loss": 3.1431,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.7299270072992701,
|
27 |
+
"grad_norm": 11.656450271606445,
|
28 |
+
"learning_rate": 1.7573398215733984e-05,
|
29 |
+
"loss": 2.9201,
|
30 |
+
"step": 1500
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.9732360097323601,
|
34 |
+
"grad_norm": 13.380396842956543,
|
35 |
+
"learning_rate": 1.6762368207623682e-05,
|
36 |
+
"loss": 2.7711,
|
37 |
+
"step": 2000
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 1.0,
|
41 |
+
"eval_bleu": 12.810223940577195,
|
42 |
+
"eval_loss": 2.420867681503296,
|
43 |
+
"eval_rouge": {
|
44 |
+
"rouge1": 0.30991633445023503,
|
45 |
+
"rouge2": 0.11181858129315748,
|
46 |
+
"rougeL": 0.3092627118644087,
|
47 |
+
"rougeLsum": 0.30911397265634744
|
48 |
+
},
|
49 |
+
"eval_runtime": 264.2267,
|
50 |
+
"eval_samples_per_second": 33.494,
|
51 |
+
"eval_steps_per_second": 2.097,
|
52 |
+
"step": 2055
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 1.2165450121654502,
|
56 |
+
"grad_norm": 13.43653392791748,
|
57 |
+
"learning_rate": 1.5951338199513384e-05,
|
58 |
+
"loss": 2.3949,
|
59 |
+
"step": 2500
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 1.4598540145985401,
|
63 |
+
"grad_norm": 13.36623764038086,
|
64 |
+
"learning_rate": 1.5140308191403083e-05,
|
65 |
+
"loss": 2.2468,
|
66 |
+
"step": 3000
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 1.7031630170316303,
|
70 |
+
"grad_norm": 12.249001502990723,
|
71 |
+
"learning_rate": 1.4329278183292783e-05,
|
72 |
+
"loss": 2.2239,
|
73 |
+
"step": 3500
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 1.94647201946472,
|
77 |
+
"grad_norm": 14.571913719177246,
|
78 |
+
"learning_rate": 1.3518248175182482e-05,
|
79 |
+
"loss": 2.1585,
|
80 |
+
"step": 4000
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 2.0,
|
84 |
+
"eval_bleu": 18.428136367860084,
|
85 |
+
"eval_loss": 2.1254029273986816,
|
86 |
+
"eval_rouge": {
|
87 |
+
"rouge1": 0.3476506591337114,
|
88 |
+
"rouge2": 0.13975840731772934,
|
89 |
+
"rougeL": 0.34719576719576883,
|
90 |
+
"rougeLsum": 0.3470716527665696
|
91 |
+
},
|
92 |
+
"eval_runtime": 264.8536,
|
93 |
+
"eval_samples_per_second": 33.415,
|
94 |
+
"eval_steps_per_second": 2.092,
|
95 |
+
"step": 4110
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"epoch": 2.18978102189781,
|
99 |
+
"grad_norm": 12.905760765075684,
|
100 |
+
"learning_rate": 1.2707218167072182e-05,
|
101 |
+
"loss": 1.9148,
|
102 |
+
"step": 4500
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"epoch": 2.4330900243309004,
|
106 |
+
"grad_norm": 13.12757682800293,
|
107 |
+
"learning_rate": 1.1896188158961884e-05,
|
108 |
+
"loss": 1.8616,
|
109 |
+
"step": 5000
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"epoch": 2.67639902676399,
|
113 |
+
"grad_norm": 13.198368072509766,
|
114 |
+
"learning_rate": 1.1085158150851583e-05,
|
115 |
+
"loss": 1.8182,
|
116 |
+
"step": 5500
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 2.9197080291970803,
|
120 |
+
"grad_norm": 11.821575164794922,
|
121 |
+
"learning_rate": 1.0274128142741283e-05,
|
122 |
+
"loss": 1.8343,
|
123 |
+
"step": 6000
|
124 |
+
},
|
125 |
+
{
|
126 |
+
"epoch": 3.0,
|
127 |
+
"eval_bleu": 20.618391393580144,
|
128 |
+
"eval_loss": 1.9738637208938599,
|
129 |
+
"eval_rouge": {
|
130 |
+
"rouge1": 0.3669825372367759,
|
131 |
+
"rouge2": 0.153052954891938,
|
132 |
+
"rougeL": 0.3663996747132363,
|
133 |
+
"rougeLsum": 0.36649283390808984
|
134 |
+
},
|
135 |
+
"eval_runtime": 277.0353,
|
136 |
+
"eval_samples_per_second": 31.945,
|
137 |
+
"eval_steps_per_second": 2.0,
|
138 |
+
"step": 6165
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"epoch": 3.1630170316301705,
|
142 |
+
"grad_norm": 9.874231338500977,
|
143 |
+
"learning_rate": 9.463098134630983e-06,
|
144 |
+
"loss": 1.652,
|
145 |
+
"step": 6500
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"epoch": 3.40632603406326,
|
149 |
+
"grad_norm": 13.481595993041992,
|
150 |
+
"learning_rate": 8.652068126520682e-06,
|
151 |
+
"loss": 1.5882,
|
152 |
+
"step": 7000
|
153 |
+
},
|
154 |
+
{
|
155 |
+
"epoch": 3.6496350364963503,
|
156 |
+
"grad_norm": 12.598729133605957,
|
157 |
+
"learning_rate": 7.841038118410382e-06,
|
158 |
+
"loss": 1.5795,
|
159 |
+
"step": 7500
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 3.8929440389294405,
|
163 |
+
"grad_norm": 10.078117370605469,
|
164 |
+
"learning_rate": 7.030008110300081e-06,
|
165 |
+
"loss": 1.6081,
|
166 |
+
"step": 8000
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 4.0,
|
170 |
+
"eval_bleu": 21.371071937079442,
|
171 |
+
"eval_loss": 1.8978888988494873,
|
172 |
+
"eval_rouge": {
|
173 |
+
"rouge1": 0.3791688230162823,
|
174 |
+
"rouge2": 0.16206698950766746,
|
175 |
+
"rougeL": 0.37860440156203046,
|
176 |
+
"rougeLsum": 0.3785680330341368
|
177 |
+
},
|
178 |
+
"eval_runtime": 270.921,
|
179 |
+
"eval_samples_per_second": 32.666,
|
180 |
+
"eval_steps_per_second": 2.045,
|
181 |
+
"step": 8220
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"epoch": 4.13625304136253,
|
185 |
+
"grad_norm": 13.678194999694824,
|
186 |
+
"learning_rate": 6.2206001622060015e-06,
|
187 |
+
"loss": 1.5118,
|
188 |
+
"step": 8500
|
189 |
+
},
|
190 |
+
{
|
191 |
+
"epoch": 4.37956204379562,
|
192 |
+
"grad_norm": 11.986939430236816,
|
193 |
+
"learning_rate": 5.409570154095703e-06,
|
194 |
+
"loss": 1.445,
|
195 |
+
"step": 9000
|
196 |
+
},
|
197 |
+
{
|
198 |
+
"epoch": 4.622871046228711,
|
199 |
+
"grad_norm": 12.918234825134277,
|
200 |
+
"learning_rate": 4.598540145985402e-06,
|
201 |
+
"loss": 1.447,
|
202 |
+
"step": 9500
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"epoch": 4.866180048661801,
|
206 |
+
"grad_norm": 10.726964950561523,
|
207 |
+
"learning_rate": 3.7875101378751015e-06,
|
208 |
+
"loss": 1.406,
|
209 |
+
"step": 10000
|
210 |
+
},
|
211 |
+
{
|
212 |
+
"epoch": 5.0,
|
213 |
+
"eval_bleu": 22.3831585320865,
|
214 |
+
"eval_loss": 1.859816074371338,
|
215 |
+
"eval_rouge": {
|
216 |
+
"rouge1": 0.3916949600932663,
|
217 |
+
"rouge2": 0.16823271455474847,
|
218 |
+
"rougeL": 0.39121746928526785,
|
219 |
+
"rougeLsum": 0.39116348309568805
|
220 |
+
},
|
221 |
+
"eval_runtime": 277.4168,
|
222 |
+
"eval_samples_per_second": 31.901,
|
223 |
+
"eval_steps_per_second": 1.997,
|
224 |
+
"step": 10275
|
225 |
+
},
|
226 |
+
{
|
227 |
+
"epoch": 5.109489051094891,
|
228 |
+
"grad_norm": 15.405741691589355,
|
229 |
+
"learning_rate": 2.979724249797243e-06,
|
230 |
+
"loss": 1.4047,
|
231 |
+
"step": 10500
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"epoch": 5.35279805352798,
|
235 |
+
"grad_norm": 10.873234748840332,
|
236 |
+
"learning_rate": 2.1686942416869423e-06,
|
237 |
+
"loss": 1.3599,
|
238 |
+
"step": 11000
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"epoch": 5.59610705596107,
|
242 |
+
"grad_norm": 10.827215194702148,
|
243 |
+
"learning_rate": 1.3576642335766423e-06,
|
244 |
+
"loss": 1.33,
|
245 |
+
"step": 11500
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"epoch": 5.839416058394161,
|
249 |
+
"grad_norm": 12.577991485595703,
|
250 |
+
"learning_rate": 5.466342254663423e-07,
|
251 |
+
"loss": 1.3453,
|
252 |
+
"step": 12000
|
253 |
+
},
|
254 |
+
{
|
255 |
+
"epoch": 6.0,
|
256 |
+
"eval_bleu": 23.393142474562815,
|
257 |
+
"eval_loss": 1.8506520986557007,
|
258 |
+
"eval_rouge": {
|
259 |
+
"rouge1": 0.3935306250560502,
|
260 |
+
"rouge2": 0.16916814635458705,
|
261 |
+
"rougeL": 0.393161689534573,
|
262 |
+
"rougeLsum": 0.3930333602367517
|
263 |
+
},
|
264 |
+
"eval_runtime": 268.6729,
|
265 |
+
"eval_samples_per_second": 32.94,
|
266 |
+
"eval_steps_per_second": 2.062,
|
267 |
+
"step": 12330
|
268 |
+
}
|
269 |
+
],
|
270 |
+
"logging_steps": 500,
|
271 |
+
"max_steps": 12330,
|
272 |
+
"num_input_tokens_seen": 0,
|
273 |
+
"num_train_epochs": 6,
|
274 |
+
"save_steps": 500,
|
275 |
+
"stateful_callbacks": {
|
276 |
+
"TrainerControl": {
|
277 |
+
"args": {
|
278 |
+
"should_epoch_stop": false,
|
279 |
+
"should_evaluate": false,
|
280 |
+
"should_log": false,
|
281 |
+
"should_save": true,
|
282 |
+
"should_training_stop": true
|
283 |
+
},
|
284 |
+
"attributes": {}
|
285 |
+
}
|
286 |
+
},
|
287 |
+
"total_flos": 1030951237386240.0,
|
288 |
+
"train_batch_size": 16,
|
289 |
+
"trial_name": null,
|
290 |
+
"trial_params": null
|
291 |
+
}
|
checkpoint-12330/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f23aa42f71fb49bdefda103a1093e935cb64d8290342e3b02d230058bfcccbc
|
3 |
+
size 5432
|
checkpoint-12330/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2055/config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Helsinki-NLP/opus-mt-ko-en",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"activation_function": "swish",
|
5 |
+
"add_bias_logits": false,
|
6 |
+
"add_final_layer_norm": false,
|
7 |
+
"architectures": [
|
8 |
+
"MarianMTModel"
|
9 |
+
],
|
10 |
+
"attention_dropout": 0.0,
|
11 |
+
"bos_token_id": 0,
|
12 |
+
"classif_dropout": 0.0,
|
13 |
+
"classifier_dropout": 0.0,
|
14 |
+
"d_model": 512,
|
15 |
+
"decoder_attention_heads": 8,
|
16 |
+
"decoder_ffn_dim": 2048,
|
17 |
+
"decoder_layerdrop": 0.0,
|
18 |
+
"decoder_layers": 6,
|
19 |
+
"decoder_start_token_id": 65000,
|
20 |
+
"decoder_vocab_size": 65001,
|
21 |
+
"dropout": 0.1,
|
22 |
+
"encoder_attention_heads": 8,
|
23 |
+
"encoder_ffn_dim": 2048,
|
24 |
+
"encoder_layerdrop": 0.0,
|
25 |
+
"encoder_layers": 6,
|
26 |
+
"eos_token_id": 0,
|
27 |
+
"extra_pos_embeddings": 65001,
|
28 |
+
"forced_eos_token_id": 0,
|
29 |
+
"id2label": {
|
30 |
+
"0": "LABEL_0",
|
31 |
+
"1": "LABEL_1",
|
32 |
+
"2": "LABEL_2"
|
33 |
+
},
|
34 |
+
"init_std": 0.02,
|
35 |
+
"is_encoder_decoder": true,
|
36 |
+
"label2id": {
|
37 |
+
"LABEL_0": 0,
|
38 |
+
"LABEL_1": 1,
|
39 |
+
"LABEL_2": 2
|
40 |
+
},
|
41 |
+
"max_length": null,
|
42 |
+
"max_position_embeddings": 512,
|
43 |
+
"model_type": "marian",
|
44 |
+
"normalize_before": false,
|
45 |
+
"normalize_embedding": false,
|
46 |
+
"num_beams": null,
|
47 |
+
"num_hidden_layers": 6,
|
48 |
+
"pad_token_id": 65000,
|
49 |
+
"scale_embedding": true,
|
50 |
+
"share_encoder_decoder_embeddings": true,
|
51 |
+
"static_position_embeddings": true,
|
52 |
+
"torch_dtype": "float32",
|
53 |
+
"transformers_version": "4.48.2",
|
54 |
+
"use_cache": true,
|
55 |
+
"vocab_size": 65001
|
56 |
+
}
|
checkpoint-2055/generation_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bad_words_ids": [
|
3 |
+
[
|
4 |
+
65000
|
5 |
+
]
|
6 |
+
],
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"decoder_start_token_id": 65000,
|
9 |
+
"eos_token_id": 0,
|
10 |
+
"forced_eos_token_id": 0,
|
11 |
+
"max_length": 512,
|
12 |
+
"num_beams": 6,
|
13 |
+
"pad_token_id": 65000,
|
14 |
+
"renormalize_logits": true,
|
15 |
+
"transformers_version": "4.48.2"
|
16 |
+
}
|
checkpoint-2055/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38bdf0d55e3029bb0bdb30889b9d3a6de6b214d908934fcd75b8d5c9a6ea361f
|
3 |
+
size 309965092
|
checkpoint-2055/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4509248cd7da9bce63249bfda68d323a34384fad362ea7b04471e6e37a70fe85
|
3 |
+
size 619563642
|
checkpoint-2055/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f42cdf8930165de9a08694ccec3e966908836cc2bbbe74efe9e31c7ca0498492
|
3 |
+
size 14244
|
checkpoint-2055/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d1a94c4f6c1e886e8b654ad897d78869269c7a42d041cf138af0708606a1ab7
|
3 |
+
size 1064
|
checkpoint-2055/source.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9496f7c2be9aecb84c751ae9f35a875915dde8e3892f652a5c76811ab2a0f49
|
3 |
+
size 841805
|
checkpoint-2055/special_tokens_map.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eos_token": "</s>",
|
3 |
+
"pad_token": "<pad>",
|
4 |
+
"unk_token": "<unk>"
|
5 |
+
}
|
checkpoint-2055/target.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1531ac8576fe64267dfca0dc33950a6e9a2d3fd9e05346558ad7ea5ee0e65bf
|
3 |
+
size 813126
|
checkpoint-2055/tokenizer_config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "</s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<unk>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"65000": {
|
20 |
+
"content": "<pad>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
}
|
27 |
+
},
|
28 |
+
"clean_up_tokenization_spaces": false,
|
29 |
+
"eos_token": "</s>",
|
30 |
+
"extra_special_tokens": {},
|
31 |
+
"model_max_length": 512,
|
32 |
+
"pad_token": "<pad>",
|
33 |
+
"separate_vocabs": false,
|
34 |
+
"source_lang": "kor",
|
35 |
+
"sp_model_kwargs": {},
|
36 |
+
"target_lang": "eng",
|
37 |
+
"tokenizer_class": "MarianTokenizer",
|
38 |
+
"unk_token": "<unk>"
|
39 |
+
}
|
checkpoint-2055/trainer_state.json
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 2055,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.24330900243309003,
|
13 |
+
"grad_norm": 14.526427268981934,
|
14 |
+
"learning_rate": 1.9195458231954583e-05,
|
15 |
+
"loss": 3.5943,
|
16 |
+
"step": 500
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.48661800486618007,
|
20 |
+
"grad_norm": 16.526565551757812,
|
21 |
+
"learning_rate": 1.8384428223844285e-05,
|
22 |
+
"loss": 3.1431,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.7299270072992701,
|
27 |
+
"grad_norm": 11.656450271606445,
|
28 |
+
"learning_rate": 1.7573398215733984e-05,
|
29 |
+
"loss": 2.9201,
|
30 |
+
"step": 1500
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.9732360097323601,
|
34 |
+
"grad_norm": 13.380396842956543,
|
35 |
+
"learning_rate": 1.6762368207623682e-05,
|
36 |
+
"loss": 2.7711,
|
37 |
+
"step": 2000
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 1.0,
|
41 |
+
"eval_bleu": 12.810223940577195,
|
42 |
+
"eval_loss": 2.420867681503296,
|
43 |
+
"eval_rouge": {
|
44 |
+
"rouge1": 0.30991633445023503,
|
45 |
+
"rouge2": 0.11181858129315748,
|
46 |
+
"rougeL": 0.3092627118644087,
|
47 |
+
"rougeLsum": 0.30911397265634744
|
48 |
+
},
|
49 |
+
"eval_runtime": 264.2267,
|
50 |
+
"eval_samples_per_second": 33.494,
|
51 |
+
"eval_steps_per_second": 2.097,
|
52 |
+
"step": 2055
|
53 |
+
}
|
54 |
+
],
|
55 |
+
"logging_steps": 500,
|
56 |
+
"max_steps": 12330,
|
57 |
+
"num_input_tokens_seen": 0,
|
58 |
+
"num_train_epochs": 6,
|
59 |
+
"save_steps": 500,
|
60 |
+
"stateful_callbacks": {
|
61 |
+
"TrainerControl": {
|
62 |
+
"args": {
|
63 |
+
"should_epoch_stop": false,
|
64 |
+
"should_evaluate": false,
|
65 |
+
"should_log": false,
|
66 |
+
"should_save": true,
|
67 |
+
"should_training_stop": false
|
68 |
+
},
|
69 |
+
"attributes": {}
|
70 |
+
}
|
71 |
+
},
|
72 |
+
"total_flos": 172120801738752.0,
|
73 |
+
"train_batch_size": 16,
|
74 |
+
"trial_name": null,
|
75 |
+
"trial_params": null
|
76 |
+
}
|
checkpoint-2055/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f23aa42f71fb49bdefda103a1093e935cb64d8290342e3b02d230058bfcccbc
|
3 |
+
size 5432
|
checkpoint-2055/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-4110/config.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Helsinki-NLP/opus-mt-ko-en",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"activation_function": "swish",
|
5 |
+
"add_bias_logits": false,
|
6 |
+
"add_final_layer_norm": false,
|
7 |
+
"architectures": [
|
8 |
+
"MarianMTModel"
|
9 |
+
],
|
10 |
+
"attention_dropout": 0.0,
|
11 |
+
"bos_token_id": 0,
|
12 |
+
"classif_dropout": 0.0,
|
13 |
+
"classifier_dropout": 0.0,
|
14 |
+
"d_model": 512,
|
15 |
+
"decoder_attention_heads": 8,
|
16 |
+
"decoder_ffn_dim": 2048,
|
17 |
+
"decoder_layerdrop": 0.0,
|
18 |
+
"decoder_layers": 6,
|
19 |
+
"decoder_start_token_id": 65000,
|
20 |
+
"decoder_vocab_size": 65001,
|
21 |
+
"dropout": 0.1,
|
22 |
+
"encoder_attention_heads": 8,
|
23 |
+
"encoder_ffn_dim": 2048,
|
24 |
+
"encoder_layerdrop": 0.0,
|
25 |
+
"encoder_layers": 6,
|
26 |
+
"eos_token_id": 0,
|
27 |
+
"extra_pos_embeddings": 65001,
|
28 |
+
"forced_eos_token_id": 0,
|
29 |
+
"id2label": {
|
30 |
+
"0": "LABEL_0",
|
31 |
+
"1": "LABEL_1",
|
32 |
+
"2": "LABEL_2"
|
33 |
+
},
|
34 |
+
"init_std": 0.02,
|
35 |
+
"is_encoder_decoder": true,
|
36 |
+
"label2id": {
|
37 |
+
"LABEL_0": 0,
|
38 |
+
"LABEL_1": 1,
|
39 |
+
"LABEL_2": 2
|
40 |
+
},
|
41 |
+
"max_length": null,
|
42 |
+
"max_position_embeddings": 512,
|
43 |
+
"model_type": "marian",
|
44 |
+
"normalize_before": false,
|
45 |
+
"normalize_embedding": false,
|
46 |
+
"num_beams": null,
|
47 |
+
"num_hidden_layers": 6,
|
48 |
+
"pad_token_id": 65000,
|
49 |
+
"scale_embedding": true,
|
50 |
+
"share_encoder_decoder_embeddings": true,
|
51 |
+
"static_position_embeddings": true,
|
52 |
+
"torch_dtype": "float32",
|
53 |
+
"transformers_version": "4.48.2",
|
54 |
+
"use_cache": true,
|
55 |
+
"vocab_size": 65001
|
56 |
+
}
|
checkpoint-4110/generation_config.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bad_words_ids": [
|
3 |
+
[
|
4 |
+
65000
|
5 |
+
]
|
6 |
+
],
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"decoder_start_token_id": 65000,
|
9 |
+
"eos_token_id": 0,
|
10 |
+
"forced_eos_token_id": 0,
|
11 |
+
"max_length": 512,
|
12 |
+
"num_beams": 6,
|
13 |
+
"pad_token_id": 65000,
|
14 |
+
"renormalize_logits": true,
|
15 |
+
"transformers_version": "4.48.2"
|
16 |
+
}
|
checkpoint-4110/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4845d86a1d166e372636c50ba7afd2dbb8207d0c43a87650c04be7bbbf316db5
|
3 |
+
size 309965092
|
checkpoint-4110/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77ac2fba4b592bb1c58e6561231ccf8d7717aaa78ec3a37f276214c52e1dcda6
|
3 |
+
size 619563642
|
checkpoint-4110/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e591177b805f0c379b730544849b0bea9f7c43f8b57876d46ca1e63747a4f1e
|
3 |
+
size 14244
|
checkpoint-4110/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:10dc2a776ee952c1f08a2eca5de9a8899edc33950a9b8759306e974c3bd30dcf
|
3 |
+
size 1064
|
checkpoint-4110/source.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9496f7c2be9aecb84c751ae9f35a875915dde8e3892f652a5c76811ab2a0f49
|
3 |
+
size 841805
|
checkpoint-4110/special_tokens_map.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"eos_token": "</s>",
|
3 |
+
"pad_token": "<pad>",
|
4 |
+
"unk_token": "<unk>"
|
5 |
+
}
|
checkpoint-4110/target.spm
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1531ac8576fe64267dfca0dc33950a6e9a2d3fd9e05346558ad7ea5ee0e65bf
|
3 |
+
size 813126
|
checkpoint-4110/tokenizer_config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "</s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<unk>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"65000": {
|
20 |
+
"content": "<pad>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
}
|
27 |
+
},
|
28 |
+
"clean_up_tokenization_spaces": false,
|
29 |
+
"eos_token": "</s>",
|
30 |
+
"extra_special_tokens": {},
|
31 |
+
"model_max_length": 512,
|
32 |
+
"pad_token": "<pad>",
|
33 |
+
"separate_vocabs": false,
|
34 |
+
"source_lang": "kor",
|
35 |
+
"sp_model_kwargs": {},
|
36 |
+
"target_lang": "eng",
|
37 |
+
"tokenizer_class": "MarianTokenizer",
|
38 |
+
"unk_token": "<unk>"
|
39 |
+
}
|