Easonwangzk commited on 29 days ago

Commit

432ed18

verified ·

1 Parent(s): d23beb3

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +3 -0
README.md +206 -0
adapter_config.json +41 -0
adapter_model.safetensors +3 -0
checkpoint-4375/README.md +206 -0
checkpoint-4375/adapter_config.json +41 -0
checkpoint-4375/adapter_model.safetensors +3 -0
checkpoint-4375/optimizer.pt +3 -0
checkpoint-4375/rng_state.pth +3 -0
checkpoint-4375/scheduler.pt +3 -0
checkpoint-4375/trainer_state.json +653 -0
checkpoint-4375/training_args.bin +3 -0
cls/README.md +206 -0
cls/adapter_config.json +38 -0
cls/adapter_model.safetensors +3 -0
cls/checkpoint-4375/model.safetensors +3 -0
cls/checkpoint-4375/optimizer.pt +3 -0
cls/checkpoint-4375/rng_state.pth +3 -0
cls/checkpoint-4375/scheduler.pt +3 -0
cls/checkpoint-4375/trainer_state.json +653 -0
cls/checkpoint-4375/training_args.bin +3 -0
cls/classifier_head.pt +3 -0
cls/id2label.json +22 -0
cls/label2id.json +22 -0
cls/sentencepiece.bpe.model +3 -0
cls/special_tokens_map.json +15 -0
cls/tokenizer.json +3 -0
cls/tokenizer_config.json +55 -0
id2label.json +22 -0
label2id.json +22 -0
mean/README.md +206 -0
mean/adapter_config.json +38 -0
mean/adapter_model.safetensors +3 -0
mean/checkpoint-4375/model.safetensors +3 -0
mean/checkpoint-4375/optimizer.pt +3 -0
mean/checkpoint-4375/rng_state.pth +3 -0
mean/checkpoint-4375/scheduler.pt +3 -0
mean/checkpoint-4375/trainer_state.json +653 -0
mean/checkpoint-4375/training_args.bin +3 -0
mean/classifier_head.pt +3 -0
mean/id2label.json +22 -0
mean/label2id.json +22 -0
mean/sentencepiece.bpe.model +3 -0
mean/special_tokens_map.json +15 -0
mean/tokenizer.json +3 -0
mean/tokenizer_config.json +55 -0
sentencepiece.bpe.model +3 -0
special_tokens_map.json +15 -0
tokenizer.json +3 -0
tokenizer_config.json +55 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+cls/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+mean/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,206 @@

+---
+base_model: xlm-roberta-base
+library_name: peft
+tags:
+- base_model:adapter:xlm-roberta-base
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "xlm-roberta-base",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "key",
+    "query",
+    "value"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:415a2fc81835d4a32a0f4a0130dc4d7b3e54538057b8c505bbb81b7633f7cc4a
+size 4761648

checkpoint-4375/README.md ADDED Viewed

	@@ -0,0 +1,206 @@

+---
+base_model: xlm-roberta-base
+library_name: peft
+tags:
+- base_model:adapter:xlm-roberta-base
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

checkpoint-4375/adapter_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "xlm-roberta-base",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "classifier",
+    "score"
+  ],
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "key",
+    "query",
+    "value"
+  ],
+  "target_parameters": null,
+  "task_type": "SEQ_CLS",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

checkpoint-4375/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:415a2fc81835d4a32a0f4a0130dc4d7b3e54538057b8c505bbb81b7633f7cc4a
+size 4761648

checkpoint-4375/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:66d6726175fb892fccada29b5502d3264e53b6b2374fd8341dd077ebc3285a3f
+size 9567371

checkpoint-4375/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad5ca0f8bcf195c3eecf0d29502529a9b5292d77ddbc0d61c129ec324025167b
+size 14645

checkpoint-4375/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a993a449c7a40aa61a80703106703284ce9b6fb450a8bd64d07585e0fc640e07
+size 1465

checkpoint-4375/trainer_state.json ADDED Viewed

	@@ -0,0 +1,653 @@

+{
+  "best_global_step": 4375,
+  "best_metric": 0.9967,
+  "best_model_checkpoint": "lora-xlmr-langid/checkpoint-4375",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 4375,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.011428571428571429,
+      "grad_norm": 2.9476499557495117,
+      "learning_rate": 3.7262357414448674e-05,
+      "loss": 3.0186,
+      "step": 50
+    },
+    {
+      "epoch": 0.022857142857142857,
+      "grad_norm": 3.132594108581543,
+      "learning_rate": 7.52851711026616e-05,
+      "loss": 3.0152,
+      "step": 100
+    },
+    {
+      "epoch": 0.03428571428571429,
+      "grad_norm": 5.743668079376221,
+      "learning_rate": 0.00011330798479087452,
+      "loss": 2.8481,
+      "step": 150
+    },
+    {
+      "epoch": 0.045714285714285714,
+      "grad_norm": 6.232862949371338,
+      "learning_rate": 0.00015133079847908746,
+      "loss": 1.9112,
+      "step": 200
+    },
+    {
+      "epoch": 0.05714285714285714,
+      "grad_norm": 4.115293025970459,
+      "learning_rate": 0.0001893536121673004,
+      "loss": 0.7416,
+      "step": 250
+    },
+    {
+      "epoch": 0.06857142857142857,
+      "grad_norm": 2.165242910385132,
+      "learning_rate": 0.00019996217828993133,
+      "loss": 0.3754,
+      "step": 300
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.7203718423843384,
+      "learning_rate": 0.0001997842234752531,
+      "loss": 0.1814,
+      "step": 350
+    },
+    {
+      "epoch": 0.09142857142857143,
+      "grad_norm": 0.9917824864387512,
+      "learning_rate": 0.00019946067495120418,
+      "loss": 0.1232,
+      "step": 400
+    },
+    {
+      "epoch": 0.10285714285714286,
+      "grad_norm": 4.663485050201416,
+      "learning_rate": 0.0001989920048027309,
+      "loss": 0.068,
+      "step": 450
+    },
+    {
+      "epoch": 0.11428571428571428,
+      "grad_norm": 0.9876736402511597,
+      "learning_rate": 0.00019837889685963127,
+      "loss": 0.0654,
+      "step": 500
+    },
+    {
+      "epoch": 0.12571428571428572,
+      "grad_norm": 0.21394599974155426,
+      "learning_rate": 0.00019762224569878898,
+      "loss": 0.069,
+      "step": 550
+    },
+    {
+      "epoch": 0.13714285714285715,
+      "grad_norm": 0.7479887008666992,
+      "learning_rate": 0.00019672315533890932,
+      "loss": 0.0534,
+      "step": 600
+    },
+    {
+      "epoch": 0.14857142857142858,
+      "grad_norm": 0.12780117988586426,
+      "learning_rate": 0.00019568293762966147,
+      "loss": 0.0264,
+      "step": 650
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.13217857480049133,
+      "learning_rate": 0.0001945031103375777,
+      "loss": 0.0596,
+      "step": 700
+    },
+    {
+      "epoch": 0.17142857142857143,
+      "grad_norm": 3.1933023929595947,
+      "learning_rate": 0.00019318539493150242,
+      "loss": 0.0447,
+      "step": 750
+    },
+    {
+      "epoch": 0.18285714285714286,
+      "grad_norm": 0.5493296980857849,
+      "learning_rate": 0.0001917317140708218,
+      "loss": 0.0474,
+      "step": 800
+    },
+    {
+      "epoch": 0.19428571428571428,
+      "grad_norm": 0.10272103548049927,
+      "learning_rate": 0.00019014418880013975,
+      "loss": 0.0539,
+      "step": 850
+    },
+    {
+      "epoch": 0.2057142857142857,
+      "grad_norm": 0.21979080140590668,
+      "learning_rate": 0.00018842513545449223,
+      "loss": 0.0203,
+      "step": 900
+    },
+    {
+      "epoch": 0.21714285714285714,
+      "grad_norm": 0.19337095320224762,
+      "learning_rate": 0.00018657706227961677,
+      "loss": 0.0343,
+      "step": 950
+    },
+    {
+      "epoch": 0.22857142857142856,
+      "grad_norm": 1.1383503675460815,
+      "learning_rate": 0.00018460266577220732,
+      "loss": 0.0229,
+      "step": 1000
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.08038519322872162,
+      "learning_rate": 0.00018250482674549547,
+      "loss": 0.0196,
+      "step": 1050
+    },
+    {
+      "epoch": 0.25142857142857145,
+      "grad_norm": 1.3838239908218384,
+      "learning_rate": 0.00018028660612589743,
+      "loss": 0.0228,
+      "step": 1100
+    },
+    {
+      "epoch": 0.26285714285714284,
+      "grad_norm": 10.174747467041016,
+      "learning_rate": 0.00017795124048686103,
+      "loss": 0.0424,
+      "step": 1150
+    },
+    {
+      "epoch": 0.2742857142857143,
+      "grad_norm": 0.14027394354343414,
+      "learning_rate": 0.00017550213732642837,
+      "loss": 0.0374,
+      "step": 1200
+    },
+    {
+      "epoch": 0.2857142857142857,
+      "grad_norm": 0.03382161259651184,
+      "learning_rate": 0.00017294287009540494,
+      "loss": 0.0166,
+      "step": 1250
+    },
+    {
+      "epoch": 0.29714285714285715,
+      "grad_norm": 0.07205367088317871,
+      "learning_rate": 0.00017027717298338977,
+      "loss": 0.0316,
+      "step": 1300
+    },
+    {
+      "epoch": 0.30857142857142855,
+      "grad_norm": 0.4876558482646942,
+      "learning_rate": 0.0001675089354702732,
+      "loss": 0.0474,
+      "step": 1350
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.0657462626695633,
+      "learning_rate": 0.0001646421966511539,
+      "loss": 0.0137,
+      "step": 1400
+    },
+    {
+      "epoch": 0.3314285714285714,
+      "grad_norm": 0.043785881251096725,
+      "learning_rate": 0.00016168113934295362,
+      "loss": 0.0438,
+      "step": 1450
+    },
+    {
+      "epoch": 0.34285714285714286,
+      "grad_norm": 0.1223607212305069,
+      "learning_rate": 0.0001586300839813298,
+      "loss": 0.0285,
+      "step": 1500
+    },
+    {
+      "epoch": 0.35428571428571426,
+      "grad_norm": 0.036955248564481735,
+      "learning_rate": 0.00015549348231679093,
+      "loss": 0.0193,
+      "step": 1550
+    },
+    {
+      "epoch": 0.3657142857142857,
+      "grad_norm": 0.10787559300661087,
+      "learning_rate": 0.00015227591091921205,
+      "loss": 0.0258,
+      "step": 1600
+    },
+    {
+      "epoch": 0.37714285714285717,
+      "grad_norm": 0.05653952807188034,
+      "learning_rate": 0.00014898206450022813,
+      "loss": 0.0287,
+      "step": 1650
+    },
+    {
+      "epoch": 0.38857142857142857,
+      "grad_norm": 0.4728432297706604,
+      "learning_rate": 0.00014561674906324873,
+      "loss": 0.0357,
+      "step": 1700
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.03136986121535301,
+      "learning_rate": 0.00014218487489108813,
+      "loss": 0.0173,
+      "step": 1750
+    },
+    {
+      "epoch": 0.4114285714285714,
+      "grad_norm": 0.019133005291223526,
+      "learning_rate": 0.00013869144938144325,
+      "loss": 0.014,
+      "step": 1800
+    },
+    {
+      "epoch": 0.4228571428571429,
+      "grad_norm": 0.025615772232413292,
+      "learning_rate": 0.00013514156974067242,
+      "loss": 0.0248,
+      "step": 1850
+    },
+    {
+      "epoch": 0.4342857142857143,
+      "grad_norm": 0.12288248538970947,
+      "learning_rate": 0.00013154041554653577,
+      "loss": 0.0421,
+      "step": 1900
+    },
+    {
+      "epoch": 0.44571428571428573,
+      "grad_norm": 0.22975103557109833,
+      "learning_rate": 0.00012789324119074852,
+      "loss": 0.0489,
+      "step": 1950
+    },
+    {
+      "epoch": 0.45714285714285713,
+      "grad_norm": 0.04648689553141594,
+      "learning_rate": 0.00012420536821237444,
+      "loss": 0.0178,
+      "step": 2000
+    },
+    {
+      "epoch": 0.4685714285714286,
+      "grad_norm": 0.034475695341825485,
+      "learning_rate": 0.00012048217753324587,
+      "loss": 0.045,
+      "step": 2050
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 0.05342303588986397,
+      "learning_rate": 0.00011672910160673858,
+      "loss": 0.01,
+      "step": 2100
+    },
+    {
+      "epoch": 0.49142857142857144,
+      "grad_norm": 0.07373230904340744,
+      "learning_rate": 0.00011295161649135815,
+      "loss": 0.0243,
+      "step": 2150
+    },
+    {
+      "epoch": 0.5028571428571429,
+      "grad_norm": 0.11881459504365921,
+      "learning_rate": 0.00010915523386070277,
+      "loss": 0.0287,
+      "step": 2200
+    },
+    {
+      "epoch": 0.5142857142857142,
+      "grad_norm": 0.16949842870235443,
+      "learning_rate": 0.0001053454929614603,
+      "loss": 0.024,
+      "step": 2250
+    },
+    {
+      "epoch": 0.5257142857142857,
+      "grad_norm": 0.19728004932403564,
+      "learning_rate": 0.00010152795253117406,
+      "loss": 0.0097,
+      "step": 2300
+    },
+    {
+      "epoch": 0.5371428571428571,
+      "grad_norm": 3.5783822536468506,
+      "learning_rate": 9.770818268756971e-05,
+      "loss": 0.0379,
+      "step": 2350
+    },
+    {
+      "epoch": 0.5485714285714286,
+      "grad_norm": 0.03188510239124298,
+      "learning_rate": 9.389175680127735e-05,
+      "loss": 0.0368,
+      "step": 2400
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.06983346492052078,
+      "learning_rate": 9.008424336380778e-05,
+      "loss": 0.0271,
+      "step": 2450
+    },
+    {
+      "epoch": 0.5714285714285714,
+      "grad_norm": 4.264764308929443,
+      "learning_rate": 8.62911978626472e-05,
+      "loss": 0.0226,
+      "step": 2500
+    },
+    {
+      "epoch": 0.5828571428571429,
+      "grad_norm": 0.02377461828291416,
+      "learning_rate": 8.251815467532628e-05,
+      "loss": 0.0111,
+      "step": 2550
+    },
+    {
+      "epoch": 0.5942857142857143,
+      "grad_norm": 0.05363013595342636,
+      "learning_rate": 7.877061899429066e-05,
+      "loss": 0.0223,
+      "step": 2600
+    },
+    {
+      "epoch": 0.6057142857142858,
+      "grad_norm": 0.5701267719268799,
+      "learning_rate": 7.505405879435429e-05,
+      "loss": 0.0219,
+      "step": 2650
+    },
+    {
+      "epoch": 0.6171428571428571,
+      "grad_norm": 0.030285466462373734,
+      "learning_rate": 7.137389685445726e-05,
+      "loss": 0.0307,
+      "step": 2700
+    },
+    {
+      "epoch": 0.6285714285714286,
+      "grad_norm": 0.10280855000019073,
+      "learning_rate": 6.773550284536764e-05,
+      "loss": 0.0201,
+      "step": 2750
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.017326869070529938,
+      "learning_rate": 6.414418549487308e-05,
+      "loss": 0.0367,
+      "step": 2800
+    },
+    {
+      "epoch": 0.6514285714285715,
+      "grad_norm": 0.039858993142843246,
+      "learning_rate": 6.060518484189344e-05,
+      "loss": 0.0302,
+      "step": 2850
+    },
+    {
+      "epoch": 0.6628571428571428,
+      "grad_norm": 0.19618330895900726,
+      "learning_rate": 5.712366459081577e-05,
+      "loss": 0.0035,
+      "step": 2900
+    },
+    {
+      "epoch": 0.6742857142857143,
+      "grad_norm": 0.10122616589069366,
+      "learning_rate": 5.37047045772089e-05,
+      "loss": 0.0301,
+      "step": 2950
+    },
+    {
+      "epoch": 0.6857142857142857,
+      "grad_norm": 0.28158578276634216,
+      "learning_rate": 5.035329335590868e-05,
+      "loss": 0.0183,
+      "step": 3000
+    },
+    {
+      "epoch": 0.6971428571428572,
+      "grad_norm": 1.7570079565048218,
+      "learning_rate": 4.707432092229059e-05,
+      "loss": 0.0124,
+      "step": 3050
+    },
+    {
+      "epoch": 0.7085714285714285,
+      "grad_norm": 0.3031100630760193,
+      "learning_rate": 4.387257157734841e-05,
+      "loss": 0.0145,
+      "step": 3100
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 0.014142443425953388,
+      "learning_rate": 4.0752716946990246e-05,
+      "loss": 0.0147,
+      "step": 3150
+    },
+    {
+      "epoch": 0.7314285714285714,
+      "grad_norm": 0.02221490442752838,
+      "learning_rate": 3.7719309165737013e-05,
+      "loss": 0.0085,
+      "step": 3200
+    },
+    {
+      "epoch": 0.7428571428571429,
+      "grad_norm": 0.03386814147233963,
+      "learning_rate": 3.477677423476935e-05,
+      "loss": 0.0178,
+      "step": 3250
+    },
+    {
+      "epoch": 0.7542857142857143,
+      "grad_norm": 0.020248012617230415,
+      "learning_rate": 3.19294055640135e-05,
+      "loss": 0.0325,
+      "step": 3300
+    },
+    {
+      "epoch": 0.7657142857142857,
+      "grad_norm": 0.03207193687558174,
+      "learning_rate": 2.9181357707689438e-05,
+      "loss": 0.0148,
+      "step": 3350
+    },
+    {
+      "epoch": 0.7771428571428571,
+      "grad_norm": 0.024402625858783722,
+      "learning_rate": 2.6536640302461034e-05,
+      "loss": 0.047,
+      "step": 3400
+    },
+    {
+      "epoch": 0.7885714285714286,
+      "grad_norm": 0.10441145300865173,
+      "learning_rate": 2.399911221703377e-05,
+      "loss": 0.0134,
+      "step": 3450
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.03832864388823509,
+      "learning_rate": 2.1572475921735357e-05,
+      "loss": 0.0107,
+      "step": 3500
+    },
+    {
+      "epoch": 0.8114285714285714,
+      "grad_norm": 0.01320917159318924,
+      "learning_rate": 1.9260272086295082e-05,
+      "loss": 0.008,
+      "step": 3550
+    },
+    {
+      "epoch": 0.8228571428571428,
+      "grad_norm": 0.14558428525924683,
+      "learning_rate": 1.706587441370433e-05,
+      "loss": 0.03,
+      "step": 3600
+    },
+    {
+      "epoch": 0.8342857142857143,
+      "grad_norm": 0.08396098017692566,
+      "learning_rate": 1.499248471769531e-05,
+      "loss": 0.0131,
+      "step": 3650
+    },
+    {
+      "epoch": 0.8457142857142858,
+      "grad_norm": 0.027459578588604927,
+      "learning_rate": 1.304312825102142e-05,
+      "loss": 0.0118,
+      "step": 3700
+    },
+    {
+      "epoch": 0.8571428571428571,
+      "grad_norm": 0.22308436036109924,
+      "learning_rate": 1.1220649291354902e-05,
+      "loss": 0.0033,
+      "step": 3750
+    },
+    {
+      "epoch": 0.8685714285714285,
+      "grad_norm": 0.01619495451450348,
+      "learning_rate": 9.527706991242502e-06,
+      "loss": 0.0185,
+      "step": 3800
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.0167935099452734,
+      "learning_rate": 7.966771498174963e-06,
+      "loss": 0.023,
+      "step": 3850
+    },
+    {
+      "epoch": 0.8914285714285715,
+      "grad_norm": 0.018898479640483856,
+      "learning_rate": 6.540120350430423e-06,
+      "loss": 0.0261,
+      "step": 3900
+    },
+    {
+      "epoch": 0.9028571428571428,
+      "grad_norm": 0.04563748463988304,
+      "learning_rate": 5.24983515395161e-06,
+      "loss": 0.0103,
+      "step": 3950
+    },
+    {
+      "epoch": 0.9142857142857143,
+      "grad_norm": 0.050685442984104156,
+      "learning_rate": 4.097798545104914e-06,
+      "loss": 0.005,
+      "step": 4000
+    },
+    {
+      "epoch": 0.9257142857142857,
+      "grad_norm": 0.13636836409568787,
+      "learning_rate": 3.0856914437528805e-06,
+      "loss": 0.0326,
+      "step": 4050
+    },
+    {
+      "epoch": 0.9371428571428572,
+      "grad_norm": 0.035096440464258194,
+      "learning_rate": 2.2149906006486364e-06,
+      "loss": 0.0196,
+      "step": 4100
+    },
+    {
+      "epoch": 0.9485714285714286,
+      "grad_norm": 6.301705837249756,
+      "learning_rate": 1.4869664427303088e-06,
+      "loss": 0.0169,
+      "step": 4150
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.012595863081514835,
+      "learning_rate": 9.026812194594448e-07,
+      "loss": 0.0119,
+      "step": 4200
+    },
+    {
+      "epoch": 0.9714285714285714,
+      "grad_norm": 0.05827270448207855,
+      "learning_rate": 4.629874529084477e-07,
+      "loss": 0.011,
+      "step": 4250
+    },
+    {
+      "epoch": 0.9828571428571429,
+      "grad_norm": 0.012354250065982342,
+      "learning_rate": 1.6852669385787334e-07,
+      "loss": 0.0267,
+      "step": 4300
+    },
+    {
+      "epoch": 0.9942857142857143,
+      "grad_norm": 0.017665507271885872,
+      "learning_rate": 1.9728585719092086e-08,
+      "loss": 0.0364,
+      "step": 4350
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9967,
+      "eval_f1_macro": 0.9967018731563405,
+      "eval_loss": 0.014994239434599876,
+      "eval_runtime": 7.0745,
+      "eval_samples_per_second": 1413.524,
+      "eval_steps_per_second": 44.243,
+      "step": 4375
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 4375,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2293744193283840.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-4375/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:061be453b71cc5f0316ab060db00b5b0ed085a09e132f09349edfeaf3c76dfc0
+size 5841

cls/README.md ADDED Viewed

	@@ -0,0 +1,206 @@

+---
+base_model: xlm-roberta-base
+library_name: peft
+tags:
+- base_model:adapter:xlm-roberta-base
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

cls/adapter_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "xlm-roberta-base",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "key",
+    "query",
+    "value"
+  ],
+  "target_parameters": null,
+  "task_type": "FEATURE_EXTRACTION",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

cls/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0069df277693580cb1e9f2174db688694eef6282d376cb3e75724474e5e92a6
+size 3548696

cls/checkpoint-4375/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9637f0a60a0eb82366143b9573cfd1624e0e9b65dd7a386e736f69ae0891e2db
+size 559727136

cls/checkpoint-4375/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79695aaa1294db864d2ae12d0c3c57e187239591e32d4257830a45e2b5c77384
+size 7264779

cls/checkpoint-4375/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd939a99316e4f9a37f179832cf4a3356001e3909c993b3e827da5cd4a4617ba
+size 14645

cls/checkpoint-4375/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a993a449c7a40aa61a80703106703284ce9b6fb450a8bd64d07585e0fc640e07
+size 1465

cls/checkpoint-4375/trainer_state.json ADDED Viewed

	@@ -0,0 +1,653 @@

+{
+  "best_global_step": 4375,
+  "best_metric": 0.9965,
+  "best_model_checkpoint": "lora-xlmr-langid/cls/checkpoint-4375",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 4375,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.011428571428571429,
+      "grad_norm": 3.6967201232910156,
+      "learning_rate": 3.7262357414448674e-05,
+      "loss": 3.0631,
+      "step": 50
+    },
+    {
+      "epoch": 0.022857142857142857,
+      "grad_norm": 4.938446521759033,
+      "learning_rate": 7.52851711026616e-05,
+      "loss": 3.0133,
+      "step": 100
+    },
+    {
+      "epoch": 0.03428571428571429,
+      "grad_norm": 6.178889274597168,
+      "learning_rate": 0.00011330798479087452,
+      "loss": 2.8852,
+      "step": 150
+    },
+    {
+      "epoch": 0.045714285714285714,
+      "grad_norm": 8.431418418884277,
+      "learning_rate": 0.00015133079847908746,
+      "loss": 2.3558,
+      "step": 200
+    },
+    {
+      "epoch": 0.05714285714285714,
+      "grad_norm": 8.70128345489502,
+      "learning_rate": 0.0001893536121673004,
+      "loss": 1.5706,
+      "step": 250
+    },
+    {
+      "epoch": 0.06857142857142857,
+      "grad_norm": 5.243009567260742,
+      "learning_rate": 0.00019996217828993133,
+      "loss": 0.8102,
+      "step": 300
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 5.443891525268555,
+      "learning_rate": 0.0001997842234752531,
+      "loss": 0.4393,
+      "step": 350
+    },
+    {
+      "epoch": 0.09142857142857143,
+      "grad_norm": 11.013250350952148,
+      "learning_rate": 0.00019946067495120418,
+      "loss": 0.2872,
+      "step": 400
+    },
+    {
+      "epoch": 0.10285714285714286,
+      "grad_norm": 7.354420185089111,
+      "learning_rate": 0.0001989920048027309,
+      "loss": 0.165,
+      "step": 450
+    },
+    {
+      "epoch": 0.11428571428571428,
+      "grad_norm": 0.8610571622848511,
+      "learning_rate": 0.00019837889685963127,
+      "loss": 0.1022,
+      "step": 500
+    },
+    {
+      "epoch": 0.12571428571428572,
+      "grad_norm": 1.2767540216445923,
+      "learning_rate": 0.00019762224569878898,
+      "loss": 0.0783,
+      "step": 550
+    },
+    {
+      "epoch": 0.13714285714285715,
+      "grad_norm": 0.9847678542137146,
+      "learning_rate": 0.00019672315533890932,
+      "loss": 0.062,
+      "step": 600
+    },
+    {
+      "epoch": 0.14857142857142858,
+      "grad_norm": 0.5554406046867371,
+      "learning_rate": 0.00019568293762966147,
+      "loss": 0.031,
+      "step": 650
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.37308812141418457,
+      "learning_rate": 0.0001945031103375777,
+      "loss": 0.0307,
+      "step": 700
+    },
+    {
+      "epoch": 0.17142857142857143,
+      "grad_norm": 0.682551920413971,
+      "learning_rate": 0.00019318539493150242,
+      "loss": 0.0328,
+      "step": 750
+    },
+    {
+      "epoch": 0.18285714285714286,
+      "grad_norm": 3.072620153427124,
+      "learning_rate": 0.0001917317140708218,
+      "loss": 0.0472,
+      "step": 800
+    },
+    {
+      "epoch": 0.19428571428571428,
+      "grad_norm": 0.26799216866493225,
+      "learning_rate": 0.00019014418880013975,
+      "loss": 0.0483,
+      "step": 850
+    },
+    {
+      "epoch": 0.2057142857142857,
+      "grad_norm": 0.7617077827453613,
+      "learning_rate": 0.00018842513545449223,
+      "loss": 0.0282,
+      "step": 900
+    },
+    {
+      "epoch": 0.21714285714285714,
+      "grad_norm": 0.3497171401977539,
+      "learning_rate": 0.00018657706227961677,
+      "loss": 0.0368,
+      "step": 950
+    },
+    {
+      "epoch": 0.22857142857142856,
+      "grad_norm": 2.047266960144043,
+      "learning_rate": 0.00018460266577220732,
+      "loss": 0.0235,
+      "step": 1000
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.046612028032541275,
+      "learning_rate": 0.00018250482674549547,
+      "loss": 0.0113,
+      "step": 1050
+    },
+    {
+      "epoch": 0.25142857142857145,
+      "grad_norm": 3.3522257804870605,
+      "learning_rate": 0.00018028660612589743,
+      "loss": 0.0271,
+      "step": 1100
+    },
+    {
+      "epoch": 0.26285714285714284,
+      "grad_norm": 9.730717658996582,
+      "learning_rate": 0.00017795124048686103,
+      "loss": 0.0381,
+      "step": 1150
+    },
+    {
+      "epoch": 0.2742857142857143,
+      "grad_norm": 0.21655498445034027,
+      "learning_rate": 0.00017550213732642837,
+      "loss": 0.0265,
+      "step": 1200
+    },
+    {
+      "epoch": 0.2857142857142857,
+      "grad_norm": 0.018837904557585716,
+      "learning_rate": 0.00017294287009540494,
+      "loss": 0.0079,
+      "step": 1250
+    },
+    {
+      "epoch": 0.29714285714285715,
+      "grad_norm": 0.02092825062572956,
+      "learning_rate": 0.00017027717298338977,
+      "loss": 0.0199,
+      "step": 1300
+    },
+    {
+      "epoch": 0.30857142857142855,
+      "grad_norm": 0.24861940741539001,
+      "learning_rate": 0.0001675089354702732,
+      "loss": 0.0503,
+      "step": 1350
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.02130724862217903,
+      "learning_rate": 0.0001646421966511539,
+      "loss": 0.0187,
+      "step": 1400
+    },
+    {
+      "epoch": 0.3314285714285714,
+      "grad_norm": 0.07263777405023575,
+      "learning_rate": 0.00016168113934295362,
+      "loss": 0.033,
+      "step": 1450
+    },
+    {
+      "epoch": 0.34285714285714286,
+      "grad_norm": 0.025235984474420547,
+      "learning_rate": 0.0001586300839813298,
+      "loss": 0.0064,
+      "step": 1500
+    },
+    {
+      "epoch": 0.35428571428571426,
+      "grad_norm": 0.022818434983491898,
+      "learning_rate": 0.00015549348231679093,
+      "loss": 0.0111,
+      "step": 1550
+    },
+    {
+      "epoch": 0.3657142857142857,
+      "grad_norm": 0.6987139582633972,
+      "learning_rate": 0.00015227591091921205,
+      "loss": 0.0215,
+      "step": 1600
+    },
+    {
+      "epoch": 0.37714285714285717,
+      "grad_norm": 2.869483470916748,
+      "learning_rate": 0.00014898206450022813,
+      "loss": 0.0416,
+      "step": 1650
+    },
+    {
+      "epoch": 0.38857142857142857,
+      "grad_norm": 0.12740445137023926,
+      "learning_rate": 0.00014561674906324873,
+      "loss": 0.014,
+      "step": 1700
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.015665501356124878,
+      "learning_rate": 0.00014218487489108813,
+      "loss": 0.0084,
+      "step": 1750
+    },
+    {
+      "epoch": 0.4114285714285714,
+      "grad_norm": 0.019675016403198242,
+      "learning_rate": 0.00013869144938144325,
+      "loss": 0.0038,
+      "step": 1800
+    },
+    {
+      "epoch": 0.4228571428571429,
+      "grad_norm": 0.3949635922908783,
+      "learning_rate": 0.00013514156974067242,
+      "loss": 0.0212,
+      "step": 1850
+    },
+    {
+      "epoch": 0.4342857142857143,
+      "grad_norm": 0.04113984480500221,
+      "learning_rate": 0.00013154041554653577,
+      "loss": 0.0182,
+      "step": 1900
+    },
+    {
+      "epoch": 0.44571428571428573,
+      "grad_norm": 0.055694226175546646,
+      "learning_rate": 0.00012789324119074852,
+      "loss": 0.0408,
+      "step": 1950
+    },
+    {
+      "epoch": 0.45714285714285713,
+      "grad_norm": 0.01574717089533806,
+      "learning_rate": 0.00012420536821237444,
+      "loss": 0.016,
+      "step": 2000
+    },
+    {
+      "epoch": 0.4685714285714286,
+      "grad_norm": 0.03843434900045395,
+      "learning_rate": 0.00012048217753324587,
+      "loss": 0.0369,
+      "step": 2050
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 0.01596643030643463,
+      "learning_rate": 0.00011672910160673858,
+      "loss": 0.0147,
+      "step": 2100
+    },
+    {
+      "epoch": 0.49142857142857144,
+      "grad_norm": 0.025914940983057022,
+      "learning_rate": 0.00011295161649135815,
+      "loss": 0.0257,
+      "step": 2150
+    },
+    {
+      "epoch": 0.5028571428571429,
+      "grad_norm": 0.0455513596534729,
+      "learning_rate": 0.00010915523386070277,
+      "loss": 0.0213,
+      "step": 2200
+    },
+    {
+      "epoch": 0.5142857142857142,
+      "grad_norm": 1.0212668180465698,
+      "learning_rate": 0.0001053454929614603,
+      "loss": 0.0121,
+      "step": 2250
+    },
+    {
+      "epoch": 0.5257142857142857,
+      "grad_norm": 0.07384993880987167,
+      "learning_rate": 0.00010152795253117406,
+      "loss": 0.0013,
+      "step": 2300
+    },
+    {
+      "epoch": 0.5371428571428571,
+      "grad_norm": 0.04635027050971985,
+      "learning_rate": 9.770818268756971e-05,
+      "loss": 0.0427,
+      "step": 2350
+    },
+    {
+      "epoch": 0.5485714285714286,
+      "grad_norm": 6.902871131896973,
+      "learning_rate": 9.389175680127735e-05,
+      "loss": 0.0351,
+      "step": 2400
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.1742580235004425,
+      "learning_rate": 9.008424336380778e-05,
+      "loss": 0.0199,
+      "step": 2450
+    },
+    {
+      "epoch": 0.5714285714285714,
+      "grad_norm": 4.179978370666504,
+      "learning_rate": 8.62911978626472e-05,
+      "loss": 0.0125,
+      "step": 2500
+    },
+    {
+      "epoch": 0.5828571428571429,
+      "grad_norm": 0.01033821888267994,
+      "learning_rate": 8.251815467532628e-05,
+      "loss": 0.0038,
+      "step": 2550
+    },
+    {
+      "epoch": 0.5942857142857143,
+      "grad_norm": 0.017704356461763382,
+      "learning_rate": 7.877061899429066e-05,
+      "loss": 0.0388,
+      "step": 2600
+    },
+    {
+      "epoch": 0.6057142857142858,
+      "grad_norm": 0.06891408562660217,
+      "learning_rate": 7.505405879435429e-05,
+      "loss": 0.0179,
+      "step": 2650
+    },
+    {
+      "epoch": 0.6171428571428571,
+      "grad_norm": 0.06603990495204926,
+      "learning_rate": 7.137389685445726e-05,
+      "loss": 0.0229,
+      "step": 2700
+    },
+    {
+      "epoch": 0.6285714285714286,
+      "grad_norm": 0.029912831261754036,
+      "learning_rate": 6.773550284536764e-05,
+      "loss": 0.0073,
+      "step": 2750
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.014299588277935982,
+      "learning_rate": 6.414418549487308e-05,
+      "loss": 0.0354,
+      "step": 2800
+    },
+    {
+      "epoch": 0.6514285714285715,
+      "grad_norm": 0.03639346361160278,
+      "learning_rate": 6.060518484189344e-05,
+      "loss": 0.0292,
+      "step": 2850
+    },
+    {
+      "epoch": 0.6628571428571428,
+      "grad_norm": 0.3018437325954437,
+      "learning_rate": 5.712366459081577e-05,
+      "loss": 0.0051,
+      "step": 2900
+    },
+    {
+      "epoch": 0.6742857142857143,
+      "grad_norm": 0.030778272077441216,
+      "learning_rate": 5.37047045772089e-05,
+      "loss": 0.0173,
+      "step": 2950
+    },
+    {
+      "epoch": 0.6857142857142857,
+      "grad_norm": 0.017592955380678177,
+      "learning_rate": 5.035329335590868e-05,
+      "loss": 0.0204,
+      "step": 3000
+    },
+    {
+      "epoch": 0.6971428571428572,
+      "grad_norm": 0.017659608274698257,
+      "learning_rate": 4.707432092229059e-05,
+      "loss": 0.0391,
+      "step": 3050
+    },
+    {
+      "epoch": 0.7085714285714285,
+      "grad_norm": 0.0674019530415535,
+      "learning_rate": 4.387257157734841e-05,
+      "loss": 0.0111,
+      "step": 3100
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 0.007148749195039272,
+      "learning_rate": 4.0752716946990246e-05,
+      "loss": 0.0083,
+      "step": 3150
+    },
+    {
+      "epoch": 0.7314285714285714,
+      "grad_norm": 0.00936940684914589,
+      "learning_rate": 3.7719309165737013e-05,
+      "loss": 0.0078,
+      "step": 3200
+    },
+    {
+      "epoch": 0.7428571428571429,
+      "grad_norm": 0.023398227989673615,
+      "learning_rate": 3.477677423476935e-05,
+      "loss": 0.0073,
+      "step": 3250
+    },
+    {
+      "epoch": 0.7542857142857143,
+      "grad_norm": 0.036872465163469315,
+      "learning_rate": 3.19294055640135e-05,
+      "loss": 0.03,
+      "step": 3300
+    },
+    {
+      "epoch": 0.7657142857142857,
+      "grad_norm": 0.00688199233263731,
+      "learning_rate": 2.9181357707689438e-05,
+      "loss": 0.0076,
+      "step": 3350
+    },
+    {
+      "epoch": 0.7771428571428571,
+      "grad_norm": 0.05069967731833458,
+      "learning_rate": 2.6536640302461034e-05,
+      "loss": 0.0366,
+      "step": 3400
+    },
+    {
+      "epoch": 0.7885714285714286,
+      "grad_norm": 0.002097500255331397,
+      "learning_rate": 2.399911221703377e-05,
+      "loss": 0.0152,
+      "step": 3450
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.008384926244616508,
+      "learning_rate": 2.1572475921735357e-05,
+      "loss": 0.0045,
+      "step": 3500
+    },
+    {
+      "epoch": 0.8114285714285714,
+      "grad_norm": 0.005785002373158932,
+      "learning_rate": 1.9260272086295082e-05,
+      "loss": 0.0133,
+      "step": 3550
+    },
+    {
+      "epoch": 0.8228571428571428,
+      "grad_norm": 0.031658366322517395,
+      "learning_rate": 1.706587441370433e-05,
+      "loss": 0.0339,
+      "step": 3600
+    },
+    {
+      "epoch": 0.8342857142857143,
+      "grad_norm": 0.03882748261094093,
+      "learning_rate": 1.499248471769531e-05,
+      "loss": 0.0086,
+      "step": 3650
+    },
+    {
+      "epoch": 0.8457142857142858,
+      "grad_norm": 0.007800533901900053,
+      "learning_rate": 1.304312825102142e-05,
+      "loss": 0.0147,
+      "step": 3700
+    },
+    {
+      "epoch": 0.8571428571428571,
+      "grad_norm": 0.059970512986183167,
+      "learning_rate": 1.1220649291354902e-05,
+      "loss": 0.0093,
+      "step": 3750
+    },
+    {
+      "epoch": 0.8685714285714285,
+      "grad_norm": 0.014666451141238213,
+      "learning_rate": 9.527706991242502e-06,
+      "loss": 0.0295,
+      "step": 3800
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.04973109811544418,
+      "learning_rate": 7.966771498174963e-06,
+      "loss": 0.0182,
+      "step": 3850
+    },
+    {
+      "epoch": 0.8914285714285715,
+      "grad_norm": 0.00469050882384181,
+      "learning_rate": 6.540120350430423e-06,
+      "loss": 0.0121,
+      "step": 3900
+    },
+    {
+      "epoch": 0.9028571428571428,
+      "grad_norm": 0.004425337538123131,
+      "learning_rate": 5.24983515395161e-06,
+      "loss": 0.0178,
+      "step": 3950
+    },
+    {
+      "epoch": 0.9142857142857143,
+      "grad_norm": 0.006870228797197342,
+      "learning_rate": 4.097798545104914e-06,
+      "loss": 0.0192,
+      "step": 4000
+    },
+    {
+      "epoch": 0.9257142857142857,
+      "grad_norm": 0.006712966598570347,
+      "learning_rate": 3.0856914437528805e-06,
+      "loss": 0.0219,
+      "step": 4050
+    },
+    {
+      "epoch": 0.9371428571428572,
+      "grad_norm": 0.08353295922279358,
+      "learning_rate": 2.2149906006486364e-06,
+      "loss": 0.0176,
+      "step": 4100
+    },
+    {
+      "epoch": 0.9485714285714286,
+      "grad_norm": 5.768489360809326,
+      "learning_rate": 1.4869664427303088e-06,
+      "loss": 0.0146,
+      "step": 4150
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.008484387770295143,
+      "learning_rate": 9.026812194594448e-07,
+      "loss": 0.0144,
+      "step": 4200
+    },
+    {
+      "epoch": 0.9714285714285714,
+      "grad_norm": 0.0066833593882620335,
+      "learning_rate": 4.629874529084477e-07,
+      "loss": 0.0126,
+      "step": 4250
+    },
+    {
+      "epoch": 0.9828571428571429,
+      "grad_norm": 0.0016708762850612402,
+      "learning_rate": 1.6852669385787334e-07,
+      "loss": 0.0312,
+      "step": 4300
+    },
+    {
+      "epoch": 0.9942857142857143,
+      "grad_norm": 0.05290694534778595,
+      "learning_rate": 1.9728585719092086e-08,
+      "loss": 0.0151,
+      "step": 4350
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9965,
+      "eval_f1_macro": 0.9965041499596807,
+      "eval_loss": 0.014111927710473537,
+      "eval_runtime": 7.3568,
+      "eval_samples_per_second": 1359.295,
+      "eval_steps_per_second": 42.546,
+      "step": 4375
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 4375,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

cls/checkpoint-4375/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4075c4dd5f76985a66b5b8244a5a3a67d98141d7a13e095e3d3741f036a674f4
+size 5841

cls/classifier_head.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e01577e3d7fb321b18e6436893b9b36458a7a55d7a382a9fc1ad659a21d50d68
+size 63589

cls/id2label.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "0": "ar",
+  "1": "bg",
+  "2": "de",
+  "3": "el",
+  "4": "en",
+  "5": "es",
+  "6": "fr",
+  "7": "hi",
+  "8": "it",
+  "9": "ja",
+  "10": "nl",
+  "11": "pl",
+  "12": "pt",
+  "13": "ru",
+  "14": "sw",
+  "15": "th",
+  "16": "tr",
+  "17": "ur",
+  "18": "vi",
+  "19": "zh"
+}

cls/label2id.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "ar": 0,
+  "bg": 1,
+  "de": 2,
+  "el": 3,
+  "en": 4,
+  "es": 5,
+  "fr": 6,
+  "hi": 7,
+  "it": 8,
+  "ja": 9,
+  "nl": 10,
+  "pl": 11,
+  "pt": 12,
+  "ru": 13,
+  "sw": 14,
+  "th": 15,
+  "tr": 16,
+  "ur": 17,
+  "vi": 18,
+  "zh": 19
+}

cls/sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

cls/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

cls/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea09a711f7adcb7e3bc41b614e59b829fc98e7b50b94d273d029315524364069
+size 17082831

cls/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
+}

id2label.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "0": "ar",
+  "1": "bg",
+  "2": "de",
+  "3": "el",
+  "4": "en",
+  "5": "es",
+  "6": "fr",
+  "7": "hi",
+  "8": "it",
+  "9": "ja",
+  "10": "nl",
+  "11": "pl",
+  "12": "pt",
+  "13": "ru",
+  "14": "sw",
+  "15": "th",
+  "16": "tr",
+  "17": "ur",
+  "18": "vi",
+  "19": "zh"
+}

label2id.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "ar": 0,
+  "bg": 1,
+  "de": 2,
+  "el": 3,
+  "en": 4,
+  "es": 5,
+  "fr": 6,
+  "hi": 7,
+  "it": 8,
+  "ja": 9,
+  "nl": 10,
+  "pl": 11,
+  "pt": 12,
+  "ru": 13,
+  "sw": 14,
+  "th": 15,
+  "tr": 16,
+  "ur": 17,
+  "vi": 18,
+  "zh": 19
+}

mean/README.md ADDED Viewed

	@@ -0,0 +1,206 @@

+---
+base_model: xlm-roberta-base
+library_name: peft
+tags:
+- base_model:adapter:xlm-roberta-base
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.17.1

mean/adapter_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "xlm-roberta-base",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "key",
+    "query",
+    "value"
+  ],
+  "target_parameters": null,
+  "task_type": "FEATURE_EXTRACTION",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

mean/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c3259b74e83045754bee25a18ab00c16e5ea082f1c73200058fadf143a2bb99
+size 3548696

mean/checkpoint-4375/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05574c04d0a1632ae27e5c247a92868cfd8a59adde5ba200a1c58b6efc5cbcf6
+size 559727136

mean/checkpoint-4375/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b469bc554749b2d52c882e1c0f43a5512c17d0f39f6179643840dccd149a14f1
+size 7264779

mean/checkpoint-4375/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd939a99316e4f9a37f179832cf4a3356001e3909c993b3e827da5cd4a4617ba
+size 14645

mean/checkpoint-4375/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a993a449c7a40aa61a80703106703284ce9b6fb450a8bd64d07585e0fc640e07
+size 1465

mean/checkpoint-4375/trainer_state.json ADDED Viewed

	@@ -0,0 +1,653 @@

+{
+  "best_global_step": 4375,
+  "best_metric": 0.997,
+  "best_model_checkpoint": "lora-xlmr-langid/mean/checkpoint-4375",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 4375,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.011428571428571429,
+      "grad_norm": 3.5266411304473877,
+      "learning_rate": 3.7262357414448674e-05,
+      "loss": 3.0645,
+      "step": 50
+    },
+    {
+      "epoch": 0.022857142857142857,
+      "grad_norm": 2.9135210514068604,
+      "learning_rate": 7.52851711026616e-05,
+      "loss": 3.0412,
+      "step": 100
+    },
+    {
+      "epoch": 0.03428571428571429,
+      "grad_norm": 5.023406505584717,
+      "learning_rate": 0.00011330798479087452,
+      "loss": 2.6251,
+      "step": 150
+    },
+    {
+      "epoch": 0.045714285714285714,
+      "grad_norm": 4.2361741065979,
+      "learning_rate": 0.00015133079847908746,
+      "loss": 1.3511,
+      "step": 200
+    },
+    {
+      "epoch": 0.05714285714285714,
+      "grad_norm": 8.629075050354004,
+      "learning_rate": 0.0001893536121673004,
+      "loss": 0.4282,
+      "step": 250
+    },
+    {
+      "epoch": 0.06857142857142857,
+      "grad_norm": 0.5085486769676208,
+      "learning_rate": 0.00019996217828993133,
+      "loss": 0.1367,
+      "step": 300
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.5619511604309082,
+      "learning_rate": 0.0001997842234752531,
+      "loss": 0.0675,
+      "step": 350
+    },
+    {
+      "epoch": 0.09142857142857143,
+      "grad_norm": 0.11627336591482162,
+      "learning_rate": 0.00019946067495120418,
+      "loss": 0.0584,
+      "step": 400
+    },
+    {
+      "epoch": 0.10285714285714286,
+      "grad_norm": 0.20873290300369263,
+      "learning_rate": 0.0001989920048027309,
+      "loss": 0.042,
+      "step": 450
+    },
+    {
+      "epoch": 0.11428571428571428,
+      "grad_norm": 0.1875362992286682,
+      "learning_rate": 0.00019837889685963127,
+      "loss": 0.0376,
+      "step": 500
+    },
+    {
+      "epoch": 0.12571428571428572,
+      "grad_norm": 0.10094111412763596,
+      "learning_rate": 0.00019762224569878898,
+      "loss": 0.0462,
+      "step": 550
+    },
+    {
+      "epoch": 0.13714285714285715,
+      "grad_norm": 0.0968400090932846,
+      "learning_rate": 0.00019672315533890932,
+      "loss": 0.032,
+      "step": 600
+    },
+    {
+      "epoch": 0.14857142857142858,
+      "grad_norm": 0.020933035761117935,
+      "learning_rate": 0.00019568293762966147,
+      "loss": 0.0169,
+      "step": 650
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.06590937077999115,
+      "learning_rate": 0.0001945031103375777,
+      "loss": 0.0207,
+      "step": 700
+    },
+    {
+      "epoch": 0.17142857142857143,
+      "grad_norm": 0.18071456253528595,
+      "learning_rate": 0.00019318539493150242,
+      "loss": 0.0266,
+      "step": 750
+    },
+    {
+      "epoch": 0.18285714285714286,
+      "grad_norm": 1.208040475845337,
+      "learning_rate": 0.0001917317140708218,
+      "loss": 0.0453,
+      "step": 800
+    },
+    {
+      "epoch": 0.19428571428571428,
+      "grad_norm": 0.03780468925833702,
+      "learning_rate": 0.00019014418880013975,
+      "loss": 0.0402,
+      "step": 850
+    },
+    {
+      "epoch": 0.2057142857142857,
+      "grad_norm": 0.15351633727550507,
+      "learning_rate": 0.00018842513545449223,
+      "loss": 0.0179,
+      "step": 900
+    },
+    {
+      "epoch": 0.21714285714285714,
+      "grad_norm": 0.1763002723455429,
+      "learning_rate": 0.00018657706227961677,
+      "loss": 0.0256,
+      "step": 950
+    },
+    {
+      "epoch": 0.22857142857142856,
+      "grad_norm": 0.4840329587459564,
+      "learning_rate": 0.00018460266577220732,
+      "loss": 0.0177,
+      "step": 1000
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 0.03627489507198334,
+      "learning_rate": 0.00018250482674549547,
+      "loss": 0.0048,
+      "step": 1050
+    },
+    {
+      "epoch": 0.25142857142857145,
+      "grad_norm": 0.39385655522346497,
+      "learning_rate": 0.00018028660612589743,
+      "loss": 0.037,
+      "step": 1100
+    },
+    {
+      "epoch": 0.26285714285714284,
+      "grad_norm": 6.667974948883057,
+      "learning_rate": 0.00017795124048686103,
+      "loss": 0.033,
+      "step": 1150
+    },
+    {
+      "epoch": 0.2742857142857143,
+      "grad_norm": 0.4796125888824463,
+      "learning_rate": 0.00017550213732642837,
+      "loss": 0.0197,
+      "step": 1200
+    },
+    {
+      "epoch": 0.2857142857142857,
+      "grad_norm": 0.022151026874780655,
+      "learning_rate": 0.00017294287009540494,
+      "loss": 0.0117,
+      "step": 1250
+    },
+    {
+      "epoch": 0.29714285714285715,
+      "grad_norm": 0.024426177144050598,
+      "learning_rate": 0.00017027717298338977,
+      "loss": 0.0258,
+      "step": 1300
+    },
+    {
+      "epoch": 0.30857142857142855,
+      "grad_norm": 0.008404894731938839,
+      "learning_rate": 0.0001675089354702732,
+      "loss": 0.0315,
+      "step": 1350
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 0.019231267273426056,
+      "learning_rate": 0.0001646421966511539,
+      "loss": 0.0067,
+      "step": 1400
+    },
+    {
+      "epoch": 0.3314285714285714,
+      "grad_norm": 0.02111838385462761,
+      "learning_rate": 0.00016168113934295362,
+      "loss": 0.0346,
+      "step": 1450
+    },
+    {
+      "epoch": 0.34285714285714286,
+      "grad_norm": 0.017707131803035736,
+      "learning_rate": 0.0001586300839813298,
+      "loss": 0.0066,
+      "step": 1500
+    },
+    {
+      "epoch": 0.35428571428571426,
+      "grad_norm": 0.01487037818878889,
+      "learning_rate": 0.00015549348231679093,
+      "loss": 0.0078,
+      "step": 1550
+    },
+    {
+      "epoch": 0.3657142857142857,
+      "grad_norm": 0.013854872435331345,
+      "learning_rate": 0.00015227591091921205,
+      "loss": 0.019,
+      "step": 1600
+    },
+    {
+      "epoch": 0.37714285714285717,
+      "grad_norm": 2.5561580657958984,
+      "learning_rate": 0.00014898206450022813,
+      "loss": 0.0295,
+      "step": 1650
+    },
+    {
+      "epoch": 0.38857142857142857,
+      "grad_norm": 0.023739265277981758,
+      "learning_rate": 0.00014561674906324873,
+      "loss": 0.0254,
+      "step": 1700
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 0.008893780410289764,
+      "learning_rate": 0.00014218487489108813,
+      "loss": 0.0093,
+      "step": 1750
+    },
+    {
+      "epoch": 0.4114285714285714,
+      "grad_norm": 0.025123456493020058,
+      "learning_rate": 0.00013869144938144325,
+      "loss": 0.0182,
+      "step": 1800
+    },
+    {
+      "epoch": 0.4228571428571429,
+      "grad_norm": 0.016919748857617378,
+      "learning_rate": 0.00013514156974067242,
+      "loss": 0.0232,
+      "step": 1850
+    },
+    {
+      "epoch": 0.4342857142857143,
+      "grad_norm": 0.21599197387695312,
+      "learning_rate": 0.00013154041554653577,
+      "loss": 0.0307,
+      "step": 1900
+    },
+    {
+      "epoch": 0.44571428571428573,
+      "grad_norm": 3.532423973083496,
+      "learning_rate": 0.00012789324119074852,
+      "loss": 0.0244,
+      "step": 1950
+    },
+    {
+      "epoch": 0.45714285714285713,
+      "grad_norm": 0.007677409332245588,
+      "learning_rate": 0.00012420536821237444,
+      "loss": 0.0233,
+      "step": 2000
+    },
+    {
+      "epoch": 0.4685714285714286,
+      "grad_norm": 0.020959220826625824,
+      "learning_rate": 0.00012048217753324587,
+      "loss": 0.0234,
+      "step": 2050
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 0.006289786193519831,
+      "learning_rate": 0.00011672910160673858,
+      "loss": 0.0073,
+      "step": 2100
+    },
+    {
+      "epoch": 0.49142857142857144,
+      "grad_norm": 0.012328894808888435,
+      "learning_rate": 0.00011295161649135815,
+      "loss": 0.0308,
+      "step": 2150
+    },
+    {
+      "epoch": 0.5028571428571429,
+      "grad_norm": 0.0096198795363307,
+      "learning_rate": 0.00010915523386070277,
+      "loss": 0.0251,
+      "step": 2200
+    },
+    {
+      "epoch": 0.5142857142857142,
+      "grad_norm": 0.08846427500247955,
+      "learning_rate": 0.0001053454929614603,
+      "loss": 0.0325,
+      "step": 2250
+    },
+    {
+      "epoch": 0.5257142857142857,
+      "grad_norm": 0.024032501503825188,
+      "learning_rate": 0.00010152795253117406,
+      "loss": 0.0011,
+      "step": 2300
+    },
+    {
+      "epoch": 0.5371428571428571,
+      "grad_norm": 0.017903102561831474,
+      "learning_rate": 9.770818268756971e-05,
+      "loss": 0.026,
+      "step": 2350
+    },
+    {
+      "epoch": 0.5485714285714286,
+      "grad_norm": 11.608915328979492,
+      "learning_rate": 9.389175680127735e-05,
+      "loss": 0.0203,
+      "step": 2400
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.017623024061322212,
+      "learning_rate": 9.008424336380778e-05,
+      "loss": 0.0248,
+      "step": 2450
+    },
+    {
+      "epoch": 0.5714285714285714,
+      "grad_norm": 4.2325592041015625,
+      "learning_rate": 8.62911978626472e-05,
+      "loss": 0.0193,
+      "step": 2500
+    },
+    {
+      "epoch": 0.5828571428571429,
+      "grad_norm": 0.008484977297484875,
+      "learning_rate": 8.251815467532628e-05,
+      "loss": 0.0057,
+      "step": 2550
+    },
+    {
+      "epoch": 0.5942857142857143,
+      "grad_norm": 0.10779959708452225,
+      "learning_rate": 7.877061899429066e-05,
+      "loss": 0.0316,
+      "step": 2600
+    },
+    {
+      "epoch": 0.6057142857142858,
+      "grad_norm": 0.01012630295008421,
+      "learning_rate": 7.505405879435429e-05,
+      "loss": 0.0216,
+      "step": 2650
+    },
+    {
+      "epoch": 0.6171428571428571,
+      "grad_norm": 0.00607542647048831,
+      "learning_rate": 7.137389685445726e-05,
+      "loss": 0.0169,
+      "step": 2700
+    },
+    {
+      "epoch": 0.6285714285714286,
+      "grad_norm": 0.022452019155025482,
+      "learning_rate": 6.773550284536764e-05,
+      "loss": 0.0103,
+      "step": 2750
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.029570262879133224,
+      "learning_rate": 6.414418549487308e-05,
+      "loss": 0.0344,
+      "step": 2800
+    },
+    {
+      "epoch": 0.6514285714285715,
+      "grad_norm": 0.009840002283453941,
+      "learning_rate": 6.060518484189344e-05,
+      "loss": 0.0237,
+      "step": 2850
+    },
+    {
+      "epoch": 0.6628571428571428,
+      "grad_norm": 0.5386189818382263,
+      "learning_rate": 5.712366459081577e-05,
+      "loss": 0.0017,
+      "step": 2900
+    },
+    {
+      "epoch": 0.6742857142857143,
+      "grad_norm": 0.004062721040099859,
+      "learning_rate": 5.37047045772089e-05,
+      "loss": 0.0133,
+      "step": 2950
+    },
+    {
+      "epoch": 0.6857142857142857,
+      "grad_norm": 0.005474657751619816,
+      "learning_rate": 5.035329335590868e-05,
+      "loss": 0.0176,
+      "step": 3000
+    },
+    {
+      "epoch": 0.6971428571428572,
+      "grad_norm": 0.04338672012090683,
+      "learning_rate": 4.707432092229059e-05,
+      "loss": 0.0202,
+      "step": 3050
+    },
+    {
+      "epoch": 0.7085714285714285,
+      "grad_norm": 0.11501504480838776,
+      "learning_rate": 4.387257157734841e-05,
+      "loss": 0.003,
+      "step": 3100
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 0.004171635489910841,
+      "learning_rate": 4.0752716946990246e-05,
+      "loss": 0.0076,
+      "step": 3150
+    },
+    {
+      "epoch": 0.7314285714285714,
+      "grad_norm": 0.015187480486929417,
+      "learning_rate": 3.7719309165737013e-05,
+      "loss": 0.0091,
+      "step": 3200
+    },
+    {
+      "epoch": 0.7428571428571429,
+      "grad_norm": 0.029909875243902206,
+      "learning_rate": 3.477677423476935e-05,
+      "loss": 0.008,
+      "step": 3250
+    },
+    {
+      "epoch": 0.7542857142857143,
+      "grad_norm": 0.0026129253674298525,
+      "learning_rate": 3.19294055640135e-05,
+      "loss": 0.0216,
+      "step": 3300
+    },
+    {
+      "epoch": 0.7657142857142857,
+      "grad_norm": 0.0037045152857899666,
+      "learning_rate": 2.9181357707689438e-05,
+      "loss": 0.0051,
+      "step": 3350
+    },
+    {
+      "epoch": 0.7771428571428571,
+      "grad_norm": 0.02281978540122509,
+      "learning_rate": 2.6536640302461034e-05,
+      "loss": 0.0312,
+      "step": 3400
+    },
+    {
+      "epoch": 0.7885714285714286,
+      "grad_norm": 0.006208827719092369,
+      "learning_rate": 2.399911221703377e-05,
+      "loss": 0.0128,
+      "step": 3450
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.0057523371651768684,
+      "learning_rate": 2.1572475921735357e-05,
+      "loss": 0.0052,
+      "step": 3500
+    },
+    {
+      "epoch": 0.8114285714285714,
+      "grad_norm": 0.0012299221707507968,
+      "learning_rate": 1.9260272086295082e-05,
+      "loss": 0.0077,
+      "step": 3550
+    },
+    {
+      "epoch": 0.8228571428571428,
+      "grad_norm": 0.03079100325703621,
+      "learning_rate": 1.706587441370433e-05,
+      "loss": 0.0216,
+      "step": 3600
+    },
+    {
+      "epoch": 0.8342857142857143,
+      "grad_norm": 0.0038934126496315002,
+      "learning_rate": 1.499248471769531e-05,
+      "loss": 0.0092,
+      "step": 3650
+    },
+    {
+      "epoch": 0.8457142857142858,
+      "grad_norm": 0.0019553981255739927,
+      "learning_rate": 1.304312825102142e-05,
+      "loss": 0.0076,
+      "step": 3700
+    },
+    {
+      "epoch": 0.8571428571428571,
+      "grad_norm": 0.7177829742431641,
+      "learning_rate": 1.1220649291354902e-05,
+      "loss": 0.001,
+      "step": 3750
+    },
+    {
+      "epoch": 0.8685714285714285,
+      "grad_norm": 0.0023476951755583286,
+      "learning_rate": 9.527706991242502e-06,
+      "loss": 0.0267,
+      "step": 3800
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 0.006838622502982616,
+      "learning_rate": 7.966771498174963e-06,
+      "loss": 0.0186,
+      "step": 3850
+    },
+    {
+      "epoch": 0.8914285714285715,
+      "grad_norm": 0.0074235862120985985,
+      "learning_rate": 6.540120350430423e-06,
+      "loss": 0.013,
+      "step": 3900
+    },
+    {
+      "epoch": 0.9028571428571428,
+      "grad_norm": 0.0050347852520644665,
+      "learning_rate": 5.24983515395161e-06,
+      "loss": 0.006,
+      "step": 3950
+    },
+    {
+      "epoch": 0.9142857142857143,
+      "grad_norm": 0.0023033509496599436,
+      "learning_rate": 4.097798545104914e-06,
+      "loss": 0.0084,
+      "step": 4000
+    },
+    {
+      "epoch": 0.9257142857142857,
+      "grad_norm": 0.0027543501928448677,
+      "learning_rate": 3.0856914437528805e-06,
+      "loss": 0.0185,
+      "step": 4050
+    },
+    {
+      "epoch": 0.9371428571428572,
+      "grad_norm": 0.010095668025314808,
+      "learning_rate": 2.2149906006486364e-06,
+      "loss": 0.0133,
+      "step": 4100
+    },
+    {
+      "epoch": 0.9485714285714286,
+      "grad_norm": 7.557308673858643,
+      "learning_rate": 1.4869664427303088e-06,
+      "loss": 0.0162,
+      "step": 4150
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 0.0023619842249900103,
+      "learning_rate": 9.026812194594448e-07,
+      "loss": 0.006,
+      "step": 4200
+    },
+    {
+      "epoch": 0.9714285714285714,
+      "grad_norm": 0.0030775663908571005,
+      "learning_rate": 4.629874529084477e-07,
+      "loss": 0.005,
+      "step": 4250
+    },
+    {
+      "epoch": 0.9828571428571429,
+      "grad_norm": 0.001358982059173286,
+      "learning_rate": 1.6852669385787334e-07,
+      "loss": 0.0352,
+      "step": 4300
+    },
+    {
+      "epoch": 0.9942857142857143,
+      "grad_norm": 0.0023301932960748672,
+      "learning_rate": 1.9728585719092086e-08,
+      "loss": 0.0115,
+      "step": 4350
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.997,
+      "eval_f1_macro": 0.996997148310402,
+      "eval_loss": 0.01342015340924263,
+      "eval_runtime": 7.328,
+      "eval_samples_per_second": 1364.629,
+      "eval_steps_per_second": 42.713,
+      "step": 4375
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 4375,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

mean/checkpoint-4375/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bb4e594c01a307f96c5e4776130ac5e50f436bb186d6f394d8c81895e4289e9
+size 5841

mean/classifier_head.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:58cf068c2c5b769df377d36f2ae24cc4c0b005be68e5f00ebcaff2e21f655814
+size 63589

mean/id2label.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "0": "ar",
+  "1": "bg",
+  "2": "de",
+  "3": "el",
+  "4": "en",
+  "5": "es",
+  "6": "fr",
+  "7": "hi",
+  "8": "it",
+  "9": "ja",
+  "10": "nl",
+  "11": "pl",
+  "12": "pt",
+  "13": "ru",
+  "14": "sw",
+  "15": "th",
+  "16": "tr",
+  "17": "ur",
+  "18": "vi",
+  "19": "zh"
+}

mean/label2id.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+  "ar": 0,
+  "bg": 1,
+  "de": 2,
+  "el": 3,
+  "en": 4,
+  "es": 5,
+  "fr": 6,
+  "hi": 7,
+  "it": 8,
+  "ja": 9,
+  "nl": 10,
+  "pl": 11,
+  "pt": 12,
+  "ru": 13,
+  "sw": 14,
+  "th": 15,
+  "tr": 16,
+  "ur": 17,
+  "vi": 18,
+  "zh": 19
+}

mean/sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

mean/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

mean/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea09a711f7adcb7e3bc41b614e59b829fc98e7b50b94d273d029315524364069
+size 17082831

mean/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
+}

sentencepiece.bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea09a711f7adcb7e3bc41b614e59b829fc98e7b50b94d273d029315524364069
+size 17082831

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "250001": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "XLMRobertaTokenizer",
+  "unk_token": "<unk>"
+}