hclent commited on
Commit
46747e3
·
verified ·
1 Parent(s): 8f351f1

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +70 -3
  2. config.json +125 -0
  3. pytorch_model.bin +3 -0
  4. sentencepiece.bpe.model +3 -0
README.md CHANGED
@@ -1,3 +1,70 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ metrics:
4
+ - bleu
5
+ - chrf
6
+ language:
7
+ - eng
8
+ - acf
9
+ - hwc
10
+ - gul
11
+ - icr
12
+ - mbf
13
+ - ktu
14
+ - jam
15
+ - mkn
16
+ - cbk
17
+ - bzj
18
+ - rop
19
+ - pcm
20
+ - srm
21
+ - kri
22
+ - djk
23
+ - tdt
24
+ - mfe
25
+ - hat
26
+ - crs
27
+ - sag
28
+ - pis
29
+ - pap
30
+ - tpi
31
+ - bis
32
+ - srn
33
+ base_model:
34
+ - facebook/mbart-large-50-many-to-many-mmt
35
+ pipeline_tag: text2text-generation
36
+ tags:
37
+ - '#Creoles'
38
+
39
+
40
+ ---
41
+
42
+ CreoleM2M is a machine translation model, which belongs to the **CreoleVal** benchmark.
43
+
44
+ This model was finetuned on mBART-50-MT, and supports 26 Creole languages.
45
+
46
+ For instructions and examples of how to run inference with this model, we refer you to the [CreoleVal Github](https://github.com/hclent/CreoleVal/tree/main/nlg/creolem2m).
47
+
48
+ Additional details (i.e., the CreoleM2M training dataset statistics, hyperparameters, and full results can be found in the [CreolVal Github's "Appendix"](https://github.com/hclent/CreoleVal/blob/main/Appendix/mt_m2m.md).)
49
+
50
+ Otherwise, please see the [CreoleVal paper](https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00682/124256) for full details.
51
+
52
+
53
+ If you find our paper or models helpful, please cite our work:
54
+
55
+ ```
56
+ @article{10.1162/tacl_a_00682,
57
+ author = {Lent, Heather and Tatariya, Kushal and Dabre, Raj and Chen, Yiyi and Fekete, Marcell and Ploeger, Esther and Zhou, Li and Armstrong, Ruth-Ann and Eijansantos, Abee and Malau, Catriona and Heje, Hans Erik and Lavrinovics, Ernests and Kanojia, Diptesh and Belony, Paul and Bollmann, Marcel and Grobol, Loïc and Lhoneux, Miryam de and Hershcovich, Daniel and DeGraff, Michel and Søgaard, Anders and Bjerva, Johannes},
58
+ title = {CreoleVal: Multilingual Multitask Benchmarks for Creoles},
59
+ journal = {Transactions of the Association for Computational Linguistics},
60
+ volume = {12},
61
+ pages = {950-978},
62
+ year = {2024},
63
+ month = {09},
64
+ issn = {2307-387X},
65
+ doi = {10.1162/tacl_a_00682},
66
+ url = {https://doi.org/10.1162/tacl\_a\_00682},
67
+ eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00682/2468651/tacl\_a\_00682.pdf},
68
+ }
69
+ ```
70
+
config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/mbart-large-50-many-to-many-mmt",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.1,
5
+ "activation_function": "relu",
6
+ "adaptor_activation_function": "gelu",
7
+ "adaptor_dropout": 0.1,
8
+ "adaptor_hidden_size": 512,
9
+ "adaptor_init_std": 0.02,
10
+ "adaptor_scaling_factor": 1.0,
11
+ "adaptor_tuning": false,
12
+ "add_bias_logits": false,
13
+ "add_final_layer_norm": true,
14
+ "additional_source_wait_k": -1,
15
+ "alibi_encoding": false,
16
+ "architectures": [
17
+ "MBartForConditionalGeneration"
18
+ ],
19
+ "asymmetric_alibi_encoding": false,
20
+ "attention_dropout": 0.1,
21
+ "bos_token_id": 0,
22
+ "bottleneck_mid_fusion_tokens": 4,
23
+ "classif_dropout": 0.0,
24
+ "classifier_dropout": 0.0,
25
+ "d_model": 1024,
26
+ "decoder_adaptor_tying_config": null,
27
+ "decoder_attention_heads": 16,
28
+ "decoder_ffn_dim": 4096,
29
+ "decoder_layerdrop": 0.0,
30
+ "decoder_layers": 12,
31
+ "decoder_start_token_id": 2,
32
+ "decoder_tying_config": null,
33
+ "deep_adaptor_tuning": false,
34
+ "deep_adaptor_tuning_ffn_only": false,
35
+ "dropout": 0.1,
36
+ "early_stopping": true,
37
+ "embed_low_rank_dim": 0,
38
+ "encoder_adaptor_tying_config": null,
39
+ "encoder_attention_heads": 16,
40
+ "encoder_ffn_dim": 4096,
41
+ "encoder_layerdrop": 0.0,
42
+ "encoder_layers": 12,
43
+ "encoder_tying_config": null,
44
+ "eos_token_id": 2,
45
+ "expert_ffn_size": 128,
46
+ "features_embed_dims": null,
47
+ "features_vocab_sizes": null,
48
+ "forced_eos_token_id": 2,
49
+ "gradient_checkpointing": false,
50
+ "gradient_reversal_for_domain_classifier": false,
51
+ "hypercomplex": false,
52
+ "hypercomplex_n": 2,
53
+ "ia3_adaptors": false,
54
+ "id2label": {
55
+ "0": "LABEL_0",
56
+ "1": "LABEL_1",
57
+ "2": "LABEL_2"
58
+ },
59
+ "init_std": 0.02,
60
+ "initialization_scheme": "static",
61
+ "inititialization_scheme": "static",
62
+ "is_encoder_decoder": true,
63
+ "label2id": {
64
+ "LABEL_0": 0,
65
+ "LABEL_1": 1,
66
+ "LABEL_2": 2
67
+ },
68
+ "layernorm_adaptor_input": false,
69
+ "layernorm_prompt_projection": false,
70
+ "lora_adaptor_rank": 2,
71
+ "lora_adaptors": false,
72
+ "max_length": 200,
73
+ "max_position_embeddings": 1024,
74
+ "mid_fusion_layers": 3,
75
+ "model_type": "mbart",
76
+ "moe_adaptors": false,
77
+ "multi_source": false,
78
+ "multi_source_method": null,
79
+ "multilayer_softmaxing": null,
80
+ "no_embed_norm": false,
81
+ "no_positional_encoding_decoder": false,
82
+ "no_positional_encoding_encoder": false,
83
+ "no_projection_prompt": false,
84
+ "no_scale_attention_embedding": false,
85
+ "normalize_before": true,
86
+ "normalize_embedding": true,
87
+ "num_beams": 5,
88
+ "num_domains_for_domain_classifier": -1,
89
+ "num_experts": 8,
90
+ "num_hidden_layers": 12,
91
+ "num_moe_adaptor_experts": 4,
92
+ "num_prompts": 100,
93
+ "num_sparsify_blocks": 8,
94
+ "output_past": true,
95
+ "pad_token_id": 1,
96
+ "parallel_adaptors": false,
97
+ "positional_encodings": false,
98
+ "postnorm_decoder": false,
99
+ "postnorm_encoder": false,
100
+ "prompt_dropout": 0.1,
101
+ "prompt_init_std": 0.02,
102
+ "prompt_projection_hidden_size": 4096,
103
+ "prompt_tuning": false,
104
+ "recurrent_projections": 1,
105
+ "residual_connection_adaptor": false,
106
+ "residual_connection_prompt": false,
107
+ "rope_encoding": false,
108
+ "scale_embedding": true,
109
+ "softmax_bias_tuning": false,
110
+ "softmax_temperature": 1.0,
111
+ "sparsification_temperature": 3.0,
112
+ "sparsify_attention": false,
113
+ "sparsify_ffn": false,
114
+ "static_position_embeddings": false,
115
+ "target_vocab_size": 0,
116
+ "temperature_calibration": false,
117
+ "tokenizer_class": "MBart50Tokenizer",
118
+ "transformers_version": "4.3.2",
119
+ "unidirectional_encoder": false,
120
+ "use_cache": true,
121
+ "use_moe": false,
122
+ "use_tanh_activation_prompt": false,
123
+ "vocab_size": 250054,
124
+ "wait_k": -1
125
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c23f93570b8834bf79adc2491c21e04e10c4f8957f26c0ca4c05ca87ebc21b3
3
+ size 2444676525
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051