Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +70 -3
config.json +125 -0
pytorch_model.bin +3 -0
sentencepiece.bpe.model +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,70 @@
----
-license: mit
----

+---
+license: mit
+metrics:
+- bleu
+- chrf
+language:
+- eng
+- acf
+- hwc
+- gul
+- icr
+- mbf
+- ktu
+- jam
+- mkn
+- cbk
+- bzj
+- rop
+- pcm
+- srm
+- kri
+- djk
+- tdt
+- mfe
+- hat
+- crs
+- sag
+- pis
+- pap
+- tpi
+- bis
+- srn
+base_model:
+- facebook/mbart-large-50-many-to-many-mmt
+pipeline_tag: text2text-generation
+tags:
+- '#Creoles'
+---
+CreoleM2M is a machine translation model, which belongs to the **CreoleVal** benchmark.
+This model was finetuned on mBART-50-MT, and supports 26 Creole languages.
+For instructions and examples of how to run inference with this model, we refer you to the [CreoleVal Github](https://github.com/hclent/CreoleVal/tree/main/nlg/creolem2m).
+Additional details (i.e., the CreoleM2M training dataset statistics, hyperparameters, and full results can be found in the [CreolVal Github's "Appendix"](https://github.com/hclent/CreoleVal/blob/main/Appendix/mt_m2m.md).)
+Otherwise, please see the [CreoleVal paper](https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00682/124256) for full details.
+If you find our paper or models helpful, please cite our work:
+```
+@article{10.1162/tacl_a_00682,
+    author = {Lent, Heather and Tatariya, Kushal and Dabre, Raj and Chen, Yiyi and Fekete, Marcell and Ploeger, Esther and Zhou, Li and Armstrong, Ruth-Ann and Eijansantos, Abee and Malau, Catriona and Heje, Hans Erik and Lavrinovics, Ernests and Kanojia, Diptesh and Belony, Paul and Bollmann, Marcel and Grobol, Loïc and Lhoneux, Miryam de and Hershcovich, Daniel and DeGraff, Michel and Søgaard, Anders and Bjerva, Johannes},
+    title = {CreoleVal: Multilingual Multitask Benchmarks for Creoles},
+    journal = {Transactions of the Association for Computational Linguistics},
+    volume = {12},
+    pages = {950-978},
+    year = {2024},
+    month = {09},
+    issn = {2307-387X},
+    doi = {10.1162/tacl_a_00682},
+    url = {https://doi.org/10.1162/tacl\_a\_00682},
+    eprint = {https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl\_a\_00682/2468651/tacl\_a\_00682.pdf},
+}
+```

config.json ADDED Viewed

	@@ -0,0 +1,125 @@

+{
+  "_name_or_path": "facebook/mbart-large-50-many-to-many-mmt",
+  "_num_labels": 3,
+  "activation_dropout": 0.1,
+  "activation_function": "relu",
+  "adaptor_activation_function": "gelu",
+  "adaptor_dropout": 0.1,
+  "adaptor_hidden_size": 512,
+  "adaptor_init_std": 0.02,
+  "adaptor_scaling_factor": 1.0,
+  "adaptor_tuning": false,
+  "add_bias_logits": false,
+  "add_final_layer_norm": true,
+  "additional_source_wait_k": -1,
+  "alibi_encoding": false,
+  "architectures": [
+    "MBartForConditionalGeneration"
+  ],
+  "asymmetric_alibi_encoding": false,
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "bottleneck_mid_fusion_tokens": 4,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_adaptor_tying_config": null,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "decoder_tying_config": null,
+  "deep_adaptor_tuning": false,
+  "deep_adaptor_tuning_ffn_only": false,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "embed_low_rank_dim": 0,
+  "encoder_adaptor_tying_config": null,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "encoder_tying_config": null,
+  "eos_token_id": 2,
+  "expert_ffn_size": 128,
+  "features_embed_dims": null,
+  "features_vocab_sizes": null,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "gradient_reversal_for_domain_classifier": false,
+  "hypercomplex": false,
+  "hypercomplex_n": 2,
+  "ia3_adaptors": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "initialization_scheme": "static",
+  "inititialization_scheme": "static",
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "layernorm_adaptor_input": false,
+  "layernorm_prompt_projection": false,
+  "lora_adaptor_rank": 2,
+  "lora_adaptors": false,
+  "max_length": 200,
+  "max_position_embeddings": 1024,
+  "mid_fusion_layers": 3,
+  "model_type": "mbart",
+  "moe_adaptors": false,
+  "multi_source": false,
+  "multi_source_method": null,
+  "multilayer_softmaxing": null,
+  "no_embed_norm": false,
+  "no_positional_encoding_decoder": false,
+  "no_positional_encoding_encoder": false,
+  "no_projection_prompt": false,
+  "no_scale_attention_embedding": false,
+  "normalize_before": true,
+  "normalize_embedding": true,
+  "num_beams": 5,
+  "num_domains_for_domain_classifier": -1,
+  "num_experts": 8,
+  "num_hidden_layers": 12,
+  "num_moe_adaptor_experts": 4,
+  "num_prompts": 100,
+  "num_sparsify_blocks": 8,
+  "output_past": true,
+  "pad_token_id": 1,
+  "parallel_adaptors": false,
+  "positional_encodings": false,
+  "postnorm_decoder": false,
+  "postnorm_encoder": false,
+  "prompt_dropout": 0.1,
+  "prompt_init_std": 0.02,
+  "prompt_projection_hidden_size": 4096,
+  "prompt_tuning": false,
+  "recurrent_projections": 1,
+  "residual_connection_adaptor": false,
+  "residual_connection_prompt": false,
+  "rope_encoding": false,
+  "scale_embedding": true,
+  "softmax_bias_tuning": false,
+  "softmax_temperature": 1.0,
+  "sparsification_temperature": 3.0,
+  "sparsify_attention": false,
+  "sparsify_ffn": false,
+  "static_position_embeddings": false,
+  "target_vocab_size": 0,
+  "temperature_calibration": false,
+  "tokenizer_class": "MBart50Tokenizer",
+  "transformers_version": "4.3.2",
+  "unidirectional_encoder": false,
+  "use_cache": true,
+  "use_moe": false,
+  "use_tanh_activation_prompt": false,
+  "vocab_size": 250054,
+  "wait_k": -1
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c23f93570b8834bf79adc2491c21e04e10c4f8957f26c0ca4c05ca87ebc21b3
+size 2444676525

sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051