VOKulus commited on Apr 9

Commit

50f5dc7

verified ·

1 Parent(s): 97d3ecb

Upload folder using huggingface_hub

Browse files

Files changed (19) hide show

README.md +64 -0
checkpoint-2858/config.json +30 -0
checkpoint-2858/model.safetensors +3 -0
checkpoint-2858/optimizer.pt +3 -0
checkpoint-2858/rng_state.pth +3 -0
checkpoint-2858/scheduler.pt +3 -0
checkpoint-2858/trainer_state.json +860 -0
checkpoint-2858/training_args.bin +3 -0
config.json +30 -0
merges.txt +0 -0
model.safetensors +3 -0
runs/Apr09_07-21-59_d72aa199956d/events.out.tfevents.1744183320.d72aa199956d.7773.0 +2 -2
runs/Apr09_07-21-59_d72aa199956d/events.out.tfevents.1744183846.d72aa199956d.7773.1 +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +60 -0
training_args.bin +3 -0
training_params.json +32 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,64 @@

+---
+library_name: transformers
+tags:
+- autotrain
+- question-answering
+base_model: deepset/roberta-base-squad2
+widget:
+- text: "Who loves AutoTrain?"
+  context: "Everyone loves AutoTrain"
+datasets:
+- VOKulus/test
+---
+# Model Trained Using AutoTrain
+- Problem type: Extractive Question Answering
+## Validation Metrics
+loss: 6.235438195290044e-05
+exact_match: 99.7703
+f1: 99.8851
+runtime: 18.3183
+samples_per_second: 77.627
+steps_per_second: 9.717
+: 2.0
+## Usage
+```python
+import torch
+from transformers import AutoModelForQuestionAnswering, AutoTokenizer
+model = AutoModelForQuestionAnswering.from_pretrained(...)
+tokenizer = AutoTokenizer.from_pretrained(...)
+from transformers import BertTokenizer, BertForQuestionAnswering
+question, text = "Who loves AutoTrain?", "Everyone loves AutoTrain"
+inputs = tokenizer(question, text, return_tensors='pt')
+start_positions = torch.tensor([1])
+end_positions = torch.tensor([3])
+outputs = model(**inputs, start_positions=start_positions, end_positions=end_positions)
+loss = outputs.loss
+start_scores = outputs.start_logits
+end_scores = outputs.end_logits
+```

checkpoint-2858/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": "deepset/roberta-base-squad2",
+  "architectures": [
+    "RobertaForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "language": "english",
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "name": "Roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
+}

checkpoint-2858/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d9d1bde8b7624d84887bf84e5395e3cc1556658d2a7677a1b32e7734e09fa24
+size 496250232

checkpoint-2858/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bb819b0991628d959e4c7393f6085c36476a7f8645c2bbe0ad2d10ef177fc9f
+size 992619066

checkpoint-2858/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b68cf39d51ae0fda37757295ed75e9048e8a51b6fcb64a1285662054773cb22
+size 14244

checkpoint-2858/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0c2282cfd37433b089629f81059c258eb782f84a53354c61b9d9dbe616f7d530
+size 1064

checkpoint-2858/trainer_state.json ADDED Viewed

	@@ -0,0 +1,860 @@

+{
+  "best_metric": 6.235438195290044e-05,
+  "best_model_checkpoint": "my-model-test-roberta/checkpoint-2858",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 2858,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.01749475157452764,
+      "grad_norm": 130.36558532714844,
+      "learning_rate": 1.3986013986013987e-06,
+      "loss": 5.7756,
+      "step": 25
+    },
+    {
+      "epoch": 0.03498950314905528,
+      "grad_norm": 65.01083374023438,
+      "learning_rate": 3.0769230769230774e-06,
+      "loss": 3.2408,
+      "step": 50
+    },
+    {
+      "epoch": 0.052484254723582924,
+      "grad_norm": 54.41019058227539,
+      "learning_rate": 4.8251748251748255e-06,
+      "loss": 1.8933,
+      "step": 75
+    },
+    {
+      "epoch": 0.06997900629811056,
+      "grad_norm": 66.41553497314453,
+      "learning_rate": 6.573426573426574e-06,
+      "loss": 1.2565,
+      "step": 100
+    },
+    {
+      "epoch": 0.08747375787263821,
+      "grad_norm": 32.627281188964844,
+      "learning_rate": 8.321678321678323e-06,
+      "loss": 0.7743,
+      "step": 125
+    },
+    {
+      "epoch": 0.10496850944716585,
+      "grad_norm": 46.808109283447266,
+      "learning_rate": 1.0069930069930071e-05,
+      "loss": 0.4214,
+      "step": 150
+    },
+    {
+      "epoch": 0.1224632610216935,
+      "grad_norm": 77.44200897216797,
+      "learning_rate": 1.181818181818182e-05,
+      "loss": 0.4034,
+      "step": 175
+    },
+    {
+      "epoch": 0.13995801259622112,
+      "grad_norm": 52.945068359375,
+      "learning_rate": 1.3566433566433568e-05,
+      "loss": 0.2332,
+      "step": 200
+    },
+    {
+      "epoch": 0.15745276417074877,
+      "grad_norm": 0.948405921459198,
+      "learning_rate": 1.5314685314685317e-05,
+      "loss": 0.1798,
+      "step": 225
+    },
+    {
+      "epoch": 0.17494751574527642,
+      "grad_norm": 0.12931326031684875,
+      "learning_rate": 1.7062937062937065e-05,
+      "loss": 0.0596,
+      "step": 250
+    },
+    {
+      "epoch": 0.19244226731980407,
+      "grad_norm": 0.5479409098625183,
+      "learning_rate": 1.881118881118881e-05,
+      "loss": 0.0956,
+      "step": 275
+    },
+    {
+      "epoch": 0.2099370188943317,
+      "grad_norm": 163.63729858398438,
+      "learning_rate": 1.9937791601866253e-05,
+      "loss": 0.2539,
+      "step": 300
+    },
+    {
+      "epoch": 0.22743177046885935,
+      "grad_norm": 0.04818764701485634,
+      "learning_rate": 1.974339035769829e-05,
+      "loss": 0.116,
+      "step": 325
+    },
+    {
+      "epoch": 0.244926522043387,
+      "grad_norm": 0.11932364106178284,
+      "learning_rate": 1.954898911353033e-05,
+      "loss": 0.0437,
+      "step": 350
+    },
+    {
+      "epoch": 0.2624212736179146,
+      "grad_norm": 0.0045976778492331505,
+      "learning_rate": 1.9354587869362366e-05,
+      "loss": 0.0842,
+      "step": 375
+    },
+    {
+      "epoch": 0.27991602519244224,
+      "grad_norm": 0.051815927028656006,
+      "learning_rate": 1.9160186625194403e-05,
+      "loss": 0.0512,
+      "step": 400
+    },
+    {
+      "epoch": 0.2974107767669699,
+      "grad_norm": 0.02530599944293499,
+      "learning_rate": 1.896578538102644e-05,
+      "loss": 0.0019,
+      "step": 425
+    },
+    {
+      "epoch": 0.31490552834149754,
+      "grad_norm": 0.14514470100402832,
+      "learning_rate": 1.877138413685848e-05,
+      "loss": 0.0056,
+      "step": 450
+    },
+    {
+      "epoch": 0.33240027991602517,
+      "grad_norm": 303.62939453125,
+      "learning_rate": 1.8576982892690513e-05,
+      "loss": 0.1492,
+      "step": 475
+    },
+    {
+      "epoch": 0.34989503149055284,
+      "grad_norm": 0.003696146886795759,
+      "learning_rate": 1.8382581648522554e-05,
+      "loss": 0.0045,
+      "step": 500
+    },
+    {
+      "epoch": 0.36738978306508047,
+      "grad_norm": 20.425518035888672,
+      "learning_rate": 1.818818040435459e-05,
+      "loss": 0.0463,
+      "step": 525
+    },
+    {
+      "epoch": 0.38488453463960814,
+      "grad_norm": 0.09205462783575058,
+      "learning_rate": 1.7993779160186625e-05,
+      "loss": 0.0017,
+      "step": 550
+    },
+    {
+      "epoch": 0.40237928621413577,
+      "grad_norm": 0.038981515914201736,
+      "learning_rate": 1.7799377916018663e-05,
+      "loss": 0.0606,
+      "step": 575
+    },
+    {
+      "epoch": 0.4198740377886634,
+      "grad_norm": 0.1848757266998291,
+      "learning_rate": 1.76049766718507e-05,
+      "loss": 0.0559,
+      "step": 600
+    },
+    {
+      "epoch": 0.43736878936319107,
+      "grad_norm": 0.0069680167362093925,
+      "learning_rate": 1.7410575427682738e-05,
+      "loss": 0.0523,
+      "step": 625
+    },
+    {
+      "epoch": 0.4548635409377187,
+      "grad_norm": 0.011184507980942726,
+      "learning_rate": 1.7216174183514775e-05,
+      "loss": 0.0003,
+      "step": 650
+    },
+    {
+      "epoch": 0.4723582925122463,
+      "grad_norm": 0.011598587967455387,
+      "learning_rate": 1.7021772939346813e-05,
+      "loss": 0.0901,
+      "step": 675
+    },
+    {
+      "epoch": 0.489853044086774,
+      "grad_norm": 0.7256177067756653,
+      "learning_rate": 1.682737169517885e-05,
+      "loss": 0.0006,
+      "step": 700
+    },
+    {
+      "epoch": 0.5073477956613016,
+      "grad_norm": 0.002232016297057271,
+      "learning_rate": 1.6632970451010888e-05,
+      "loss": 0.0017,
+      "step": 725
+    },
+    {
+      "epoch": 0.5248425472358292,
+      "grad_norm": 0.0009716423810459673,
+      "learning_rate": 1.6438569206842926e-05,
+      "loss": 0.0654,
+      "step": 750
+    },
+    {
+      "epoch": 0.5423372988103569,
+      "grad_norm": 2.8868448734283447,
+      "learning_rate": 1.6244167962674963e-05,
+      "loss": 0.0003,
+      "step": 775
+    },
+    {
+      "epoch": 0.5598320503848845,
+      "grad_norm": 0.002249341458082199,
+      "learning_rate": 1.6049766718507e-05,
+      "loss": 0.0002,
+      "step": 800
+    },
+    {
+      "epoch": 0.5773268019594122,
+      "grad_norm": 0.0035393834114074707,
+      "learning_rate": 1.5855365474339038e-05,
+      "loss": 0.0006,
+      "step": 825
+    },
+    {
+      "epoch": 0.5948215535339398,
+      "grad_norm": 0.007113989442586899,
+      "learning_rate": 1.5660964230171072e-05,
+      "loss": 0.0002,
+      "step": 850
+    },
+    {
+      "epoch": 0.6123163051084675,
+      "grad_norm": 0.813864529132843,
+      "learning_rate": 1.546656298600311e-05,
+      "loss": 0.0004,
+      "step": 875
+    },
+    {
+      "epoch": 0.6298110566829951,
+      "grad_norm": 0.04794127866625786,
+      "learning_rate": 1.527216174183515e-05,
+      "loss": 0.0003,
+      "step": 900
+    },
+    {
+      "epoch": 0.6473058082575227,
+      "grad_norm": 0.0020588026382029057,
+      "learning_rate": 1.5077760497667187e-05,
+      "loss": 0.0333,
+      "step": 925
+    },
+    {
+      "epoch": 0.6648005598320503,
+      "grad_norm": 4.8790507316589355,
+      "learning_rate": 1.4883359253499223e-05,
+      "loss": 0.0038,
+      "step": 950
+    },
+    {
+      "epoch": 0.6822953114065781,
+      "grad_norm": 0.5302098989486694,
+      "learning_rate": 1.468895800933126e-05,
+      "loss": 0.0062,
+      "step": 975
+    },
+    {
+      "epoch": 0.6997900629811057,
+      "grad_norm": 0.0019412849796935916,
+      "learning_rate": 1.44945567651633e-05,
+      "loss": 0.0001,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7172848145556333,
+      "grad_norm": 0.00042624305933713913,
+      "learning_rate": 1.4300155520995335e-05,
+      "loss": 0.0002,
+      "step": 1025
+    },
+    {
+      "epoch": 0.7347795661301609,
+      "grad_norm": 0.002252366626635194,
+      "learning_rate": 1.4105754276827373e-05,
+      "loss": 0.0,
+      "step": 1050
+    },
+    {
+      "epoch": 0.7522743177046886,
+      "grad_norm": 0.0027475322131067514,
+      "learning_rate": 1.3911353032659409e-05,
+      "loss": 0.0566,
+      "step": 1075
+    },
+    {
+      "epoch": 0.7697690692792163,
+      "grad_norm": 0.009604093618690968,
+      "learning_rate": 1.3716951788491448e-05,
+      "loss": 0.002,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7872638208537439,
+      "grad_norm": 0.0056050559505820274,
+      "learning_rate": 1.3522550544323485e-05,
+      "loss": 0.0131,
+      "step": 1125
+    },
+    {
+      "epoch": 0.8047585724282715,
+      "grad_norm": 0.0009983275085687637,
+      "learning_rate": 1.3328149300155521e-05,
+      "loss": 0.0055,
+      "step": 1150
+    },
+    {
+      "epoch": 0.8222533240027992,
+      "grad_norm": 0.000412652239901945,
+      "learning_rate": 1.3133748055987559e-05,
+      "loss": 0.0352,
+      "step": 1175
+    },
+    {
+      "epoch": 0.8397480755773268,
+      "grad_norm": 0.002874561119824648,
+      "learning_rate": 1.2939346811819598e-05,
+      "loss": 0.0006,
+      "step": 1200
+    },
+    {
+      "epoch": 0.8572428271518544,
+      "grad_norm": 0.01263987272977829,
+      "learning_rate": 1.2744945567651634e-05,
+      "loss": 0.0378,
+      "step": 1225
+    },
+    {
+      "epoch": 0.8747375787263821,
+      "grad_norm": 0.007837435230612755,
+      "learning_rate": 1.2550544323483671e-05,
+      "loss": 0.0001,
+      "step": 1250
+    },
+    {
+      "epoch": 0.8922323303009098,
+      "grad_norm": 0.0008695307769812644,
+      "learning_rate": 1.2356143079315707e-05,
+      "loss": 0.0011,
+      "step": 1275
+    },
+    {
+      "epoch": 0.9097270818754374,
+      "grad_norm": 0.0004545428091660142,
+      "learning_rate": 1.2161741835147746e-05,
+      "loss": 0.0001,
+      "step": 1300
+    },
+    {
+      "epoch": 0.927221833449965,
+      "grad_norm": 0.014842044562101364,
+      "learning_rate": 1.1967340590979784e-05,
+      "loss": 0.0,
+      "step": 1325
+    },
+    {
+      "epoch": 0.9447165850244926,
+      "grad_norm": 0.008039949461817741,
+      "learning_rate": 1.177293934681182e-05,
+      "loss": 0.0017,
+      "step": 1350
+    },
+    {
+      "epoch": 0.9622113365990203,
+      "grad_norm": 0.0005223533953540027,
+      "learning_rate": 1.1578538102643857e-05,
+      "loss": 0.0004,
+      "step": 1375
+    },
+    {
+      "epoch": 0.979706088173548,
+      "grad_norm": 0.0010761632584035397,
+      "learning_rate": 1.1384136858475897e-05,
+      "loss": 0.0001,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9972008397480756,
+      "grad_norm": 0.0003285344282630831,
+      "learning_rate": 1.1189735614307932e-05,
+      "loss": 0.0,
+      "step": 1425
+    },
+    {
+      "epoch": 1.0,
+      "eval_exact_match": 100.0,
+      "eval_f1": 100.0,
+      "eval_loss": 0.005018405616283417,
+      "eval_runtime": 17.7026,
+      "eval_samples_per_second": 80.327,
+      "eval_steps_per_second": 10.055,
+      "step": 1429
+    },
+    {
+      "epoch": 1.0146955913226032,
+      "grad_norm": 0.0008568214834667742,
+      "learning_rate": 1.099533437013997e-05,
+      "loss": 0.0,
+      "step": 1450
+    },
+    {
+      "epoch": 1.0321903428971309,
+      "grad_norm": 0.0027714003808796406,
+      "learning_rate": 1.0800933125972006e-05,
+      "loss": 0.0,
+      "step": 1475
+    },
+    {
+      "epoch": 1.0496850944716585,
+      "grad_norm": 0.0010522945085540414,
+      "learning_rate": 1.0606531881804045e-05,
+      "loss": 0.0,
+      "step": 1500
+    },
+    {
+      "epoch": 1.067179846046186,
+      "grad_norm": 0.0003821647842414677,
+      "learning_rate": 1.0412130637636083e-05,
+      "loss": 0.0,
+      "step": 1525
+    },
+    {
+      "epoch": 1.0846745976207137,
+      "grad_norm": 0.0006792128551751375,
+      "learning_rate": 1.0217729393468118e-05,
+      "loss": 0.0,
+      "step": 1550
+    },
+    {
+      "epoch": 1.1021693491952413,
+      "grad_norm": 0.0005681074107997119,
+      "learning_rate": 1.0023328149300156e-05,
+      "loss": 0.0,
+      "step": 1575
+    },
+    {
+      "epoch": 1.119664100769769,
+      "grad_norm": 0.001575466594658792,
+      "learning_rate": 9.828926905132194e-06,
+      "loss": 0.02,
+      "step": 1600
+    },
+    {
+      "epoch": 1.1371588523442968,
+      "grad_norm": 0.0008907430456019938,
+      "learning_rate": 9.634525660964231e-06,
+      "loss": 0.0015,
+      "step": 1625
+    },
+    {
+      "epoch": 1.1546536039188244,
+      "grad_norm": 0.3030645549297333,
+      "learning_rate": 9.440124416796269e-06,
+      "loss": 0.0,
+      "step": 1650
+    },
+    {
+      "epoch": 1.172148355493352,
+      "grad_norm": 0.000742213916964829,
+      "learning_rate": 9.245723172628306e-06,
+      "loss": 0.0617,
+      "step": 1675
+    },
+    {
+      "epoch": 1.1896431070678797,
+      "grad_norm": 0.0004069434362463653,
+      "learning_rate": 9.051321928460342e-06,
+      "loss": 0.0001,
+      "step": 1700
+    },
+    {
+      "epoch": 1.2071378586424073,
+      "grad_norm": 0.0017081464175134897,
+      "learning_rate": 8.856920684292381e-06,
+      "loss": 0.0,
+      "step": 1725
+    },
+    {
+      "epoch": 1.224632610216935,
+      "grad_norm": 0.0006449994398280978,
+      "learning_rate": 8.662519440124417e-06,
+      "loss": 0.0,
+      "step": 1750
+    },
+    {
+      "epoch": 1.2421273617914625,
+      "grad_norm": 1.1132986545562744,
+      "learning_rate": 8.468118195956455e-06,
+      "loss": 0.0151,
+      "step": 1775
+    },
+    {
+      "epoch": 1.2596221133659902,
+      "grad_norm": 0.0004918717895634472,
+      "learning_rate": 8.273716951788492e-06,
+      "loss": 0.0012,
+      "step": 1800
+    },
+    {
+      "epoch": 1.2771168649405178,
+      "grad_norm": 0.0007090566796250641,
+      "learning_rate": 8.07931570762053e-06,
+      "loss": 0.0136,
+      "step": 1825
+    },
+    {
+      "epoch": 1.2946116165150454,
+      "grad_norm": 0.020147522911429405,
+      "learning_rate": 7.884914463452567e-06,
+      "loss": 0.0002,
+      "step": 1850
+    },
+    {
+      "epoch": 1.312106368089573,
+      "grad_norm": 0.0021832261700183153,
+      "learning_rate": 7.690513219284605e-06,
+      "loss": 0.0001,
+      "step": 1875
+    },
+    {
+      "epoch": 1.3296011196641007,
+      "grad_norm": 0.028366833925247192,
+      "learning_rate": 7.496111975116641e-06,
+      "loss": 0.0,
+      "step": 1900
+    },
+    {
+      "epoch": 1.3470958712386283,
+      "grad_norm": 0.0010503004305064678,
+      "learning_rate": 7.301710730948679e-06,
+      "loss": 0.0007,
+      "step": 1925
+    },
+    {
+      "epoch": 1.3645906228131561,
+      "grad_norm": 0.008805891498923302,
+      "learning_rate": 7.107309486780716e-06,
+      "loss": 0.033,
+      "step": 1950
+    },
+    {
+      "epoch": 1.3820853743876838,
+      "grad_norm": 0.021400198340415955,
+      "learning_rate": 6.912908242612753e-06,
+      "loss": 0.0,
+      "step": 1975
+    },
+    {
+      "epoch": 1.3995801259622114,
+      "grad_norm": 0.0005948548787273467,
+      "learning_rate": 6.71850699844479e-06,
+      "loss": 0.0,
+      "step": 2000
+    },
+    {
+      "epoch": 1.417074877536739,
+      "grad_norm": 0.0006943101761862636,
+      "learning_rate": 6.524105754276828e-06,
+      "loss": 0.0,
+      "step": 2025
+    },
+    {
+      "epoch": 1.4345696291112666,
+      "grad_norm": 0.0013550578150898218,
+      "learning_rate": 6.329704510108865e-06,
+      "loss": 0.0,
+      "step": 2050
+    },
+    {
+      "epoch": 1.4520643806857942,
+      "grad_norm": 0.0002896255755331367,
+      "learning_rate": 6.135303265940903e-06,
+      "loss": 0.0,
+      "step": 2075
+    },
+    {
+      "epoch": 1.4695591322603219,
+      "grad_norm": 0.0011648598592728376,
+      "learning_rate": 5.940902021772939e-06,
+      "loss": 0.0001,
+      "step": 2100
+    },
+    {
+      "epoch": 1.4870538838348495,
+      "grad_norm": 0.020712416619062424,
+      "learning_rate": 5.746500777604978e-06,
+      "loss": 0.0,
+      "step": 2125
+    },
+    {
+      "epoch": 1.5045486354093773,
+      "grad_norm": 0.0005796013865619898,
+      "learning_rate": 5.5520995334370144e-06,
+      "loss": 0.0005,
+      "step": 2150
+    },
+    {
+      "epoch": 1.522043386983905,
+      "grad_norm": 0.014175205491483212,
+      "learning_rate": 5.357698289269052e-06,
+      "loss": 0.0015,
+      "step": 2175
+    },
+    {
+      "epoch": 1.5395381385584326,
+      "grad_norm": 7.142549991607666,
+      "learning_rate": 5.163297045101089e-06,
+      "loss": 0.0215,
+      "step": 2200
+    },
+    {
+      "epoch": 1.5570328901329602,
+      "grad_norm": 0.0004311289812903851,
+      "learning_rate": 4.968895800933126e-06,
+      "loss": 0.0,
+      "step": 2225
+    },
+    {
+      "epoch": 1.5745276417074878,
+      "grad_norm": 0.0007753855898045003,
+      "learning_rate": 4.774494556765164e-06,
+      "loss": 0.0,
+      "step": 2250
+    },
+    {
+      "epoch": 1.5920223932820154,
+      "grad_norm": 0.0002963803126476705,
+      "learning_rate": 4.5800933125972005e-06,
+      "loss": 0.0,
+      "step": 2275
+    },
+    {
+      "epoch": 1.609517144856543,
+      "grad_norm": 0.0010399603052064776,
+      "learning_rate": 4.385692068429238e-06,
+      "loss": 0.0,
+      "step": 2300
+    },
+    {
+      "epoch": 1.6270118964310707,
+      "grad_norm": 0.000952723843511194,
+      "learning_rate": 4.1912908242612755e-06,
+      "loss": 0.0,
+      "step": 2325
+    },
+    {
+      "epoch": 1.6445066480055983,
+      "grad_norm": 0.00023090622562449425,
+      "learning_rate": 3.996889580093313e-06,
+      "loss": 0.0,
+      "step": 2350
+    },
+    {
+      "epoch": 1.662001399580126,
+      "grad_norm": 0.008954511024057865,
+      "learning_rate": 3.80248833592535e-06,
+      "loss": 0.0003,
+      "step": 2375
+    },
+    {
+      "epoch": 1.6794961511546536,
+      "grad_norm": 0.0010821650503203273,
+      "learning_rate": 3.6080870917573873e-06,
+      "loss": 0.0,
+      "step": 2400
+    },
+    {
+      "epoch": 1.6969909027291812,
+      "grad_norm": 0.0006221202784217894,
+      "learning_rate": 3.413685847589425e-06,
+      "loss": 0.0214,
+      "step": 2425
+    },
+    {
+      "epoch": 1.7144856543037088,
+      "grad_norm": 0.004466580227017403,
+      "learning_rate": 3.219284603421462e-06,
+      "loss": 0.0,
+      "step": 2450
+    },
+    {
+      "epoch": 1.7319804058782364,
+      "grad_norm": 0.002296778140589595,
+      "learning_rate": 3.024883359253499e-06,
+      "loss": 0.0015,
+      "step": 2475
+    },
+    {
+      "epoch": 1.749475157452764,
+      "grad_norm": 0.00047575862845405936,
+      "learning_rate": 2.8304821150855366e-06,
+      "loss": 0.0,
+      "step": 2500
+    },
+    {
+      "epoch": 1.7669699090272917,
+      "grad_norm": 0.0030999884475022554,
+      "learning_rate": 2.6360808709175738e-06,
+      "loss": 0.0,
+      "step": 2525
+    },
+    {
+      "epoch": 1.7844646606018193,
+      "grad_norm": 0.04565088450908661,
+      "learning_rate": 2.4416796267496113e-06,
+      "loss": 0.0003,
+      "step": 2550
+    },
+    {
+      "epoch": 1.8019594121763471,
+      "grad_norm": 0.003935549408197403,
+      "learning_rate": 2.247278382581649e-06,
+      "loss": 0.0,
+      "step": 2575
+    },
+    {
+      "epoch": 1.8194541637508748,
+      "grad_norm": 0.0007768659852445126,
+      "learning_rate": 2.052877138413686e-06,
+      "loss": 0.0001,
+      "step": 2600
+    },
+    {
+      "epoch": 1.8369489153254024,
+      "grad_norm": 9.082518577575684,
+      "learning_rate": 1.8584758942457235e-06,
+      "loss": 0.0007,
+      "step": 2625
+    },
+    {
+      "epoch": 1.85444366689993,
+      "grad_norm": 0.00017582898726686835,
+      "learning_rate": 1.6640746500777608e-06,
+      "loss": 0.0,
+      "step": 2650
+    },
+    {
+      "epoch": 1.8719384184744576,
+      "grad_norm": 0.0005721878260374069,
+      "learning_rate": 1.4696734059097982e-06,
+      "loss": 0.0,
+      "step": 2675
+    },
+    {
+      "epoch": 1.8894331700489853,
+      "grad_norm": 0.0014160927385091782,
+      "learning_rate": 1.2752721617418353e-06,
+      "loss": 0.0,
+      "step": 2700
+    },
+    {
+      "epoch": 1.906927921623513,
+      "grad_norm": 0.0003491580719128251,
+      "learning_rate": 1.0808709175738726e-06,
+      "loss": 0.0,
+      "step": 2725
+    },
+    {
+      "epoch": 1.9244226731980407,
+      "grad_norm": 0.00031783856684342027,
+      "learning_rate": 8.864696734059098e-07,
+      "loss": 0.0,
+      "step": 2750
+    },
+    {
+      "epoch": 1.9419174247725683,
+      "grad_norm": 0.0011913523776456714,
+      "learning_rate": 6.920684292379472e-07,
+      "loss": 0.0001,
+      "step": 2775
+    },
+    {
+      "epoch": 1.959412176347096,
+      "grad_norm": 0.0027918724808841944,
+      "learning_rate": 4.976671850699845e-07,
+      "loss": 0.0,
+      "step": 2800
+    },
+    {
+      "epoch": 1.9769069279216236,
+      "grad_norm": 0.002089190762490034,
+      "learning_rate": 3.032659409020218e-07,
+      "loss": 0.0037,
+      "step": 2825
+    },
+    {
+      "epoch": 1.9944016794961512,
+      "grad_norm": 0.0003496000135783106,
+      "learning_rate": 1.088646967340591e-07,
+      "loss": 0.0,
+      "step": 2850
+    },
+    {
+      "epoch": 2.0,
+      "eval_exact_match": 99.7703,
+      "eval_f1": 99.8851,
+      "eval_loss": 6.235438195290044e-05,
+      "eval_runtime": 18.4334,
+      "eval_samples_per_second": 77.143,
+      "eval_steps_per_second": 9.656,
+      "step": 2858
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 2858,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 1
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2986621929492480.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2858/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e10234dc1d4393c789f6886e8c45d4fa1c50db25477b6f263223c0983b048889
+size 5368

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": "deepset/roberta-base-squad2",
+  "architectures": [
+    "RobertaForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "language": "english",
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "name": "Roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d9d1bde8b7624d84887bf84e5395e3cc1556658d2a7677a1b32e7734e09fa24
+size 496250232

runs/Apr09_07-21-59_d72aa199956d/events.out.tfevents.1744183320.d72aa199956d.7773.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:354d4503941f38c5d3d535ba68c23ba9742432dbf0255551b50590e0fcbccbeb
-size 4184

 version https://git-lfs.github.com/spec/v1
+oid sha256:43cb6364399072b4e123fbbbce9a9776d97d2ee2c70d7a49a97db2367d6ab856
+size 30289

runs/Apr09_07-21-59_d72aa199956d/events.out.tfevents.1744183846.d72aa199956d.7773.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b204507b6743200391c0ea5130bb5c20da17f7186637e6e4b2d4868c56707705
+size 460

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "<s>",
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "full_tokenizer_file": null,
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e10234dc1d4393c789f6886e8c45d4fa1c50db25477b6f263223c0983b048889
+size 5368

training_params.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+    "data_path": "VOKulus/test",
+    "model": "deepset/roberta-base-squad2",
+    "lr": 2e-05,
+    "epochs": 2,
+    "max_seq_length": 512,
+    "max_doc_stride": 128,
+    "batch_size": 4,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "context",
+    "question_column": "question",
+    "answer_column": "answer",
+    "logging_steps": -1,
+    "project_name": "my-model-test-roberta",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "VOKulus",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff