Ehsanl
/

e5-large-v2-bertje-old-syn-filt_2ng_lr_1e5

@@ -6,7 +6,7 @@
   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "cls_token_id": 1,
-  "dtype": "bfloat16",
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 1024,

   "attention_probs_dropout_prob": 0.1,
   "classifier_dropout": null,
   "cls_token_id": 1,
+  "dtype": "float32",
   "hidden_act": "gelu",
   "hidden_dropout_prob": 0.1,
   "hidden_size": 1024,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6716fa52c0e723b5cfc225046ec38b517e6b1f2e35410569578f6319b602d369
-size 690656

 version https://git-lfs.github.com/spec/v1
+oid sha256:845ceb8734c02da9b708649efbf028c4c9e2dbba279d99bc0a3f8b60ff43dbc2
+size 1338773320

trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.7507360157016683,
   "eval_steps": 500,
-  "global_step": 765,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -540,6 +540,181 @@
       "learning_rate": 1e-05,
       "loss": 2.7141,
       "step": 760
     }
   ],
   "logging_steps": 10,
@@ -554,7 +729,7 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 1019,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1e-05,
       "loss": 2.7141,
       "step": 760
+    },
+    {
+      "epoch": 0.7556427870461236,
+      "grad_norm": 3178157.25,
+      "learning_rate": 1e-05,
+      "loss": 0.9265,
+      "step": 770
+    },
+    {
+      "epoch": 0.7654563297350343,
+      "grad_norm": 3.6791253089904785,
+      "learning_rate": 1e-05,
+      "loss": 1.8104,
+      "step": 780
+    },
+    {
+      "epoch": 0.7752698724239451,
+      "grad_norm": 4302724.5,
+      "learning_rate": 1e-05,
+      "loss": 1.4787,
+      "step": 790
+    },
+    {
+      "epoch": 0.7850834151128557,
+      "grad_norm": 1720963.75,
+      "learning_rate": 1e-05,
+      "loss": 2.1176,
+      "step": 800
+    },
+    {
+      "epoch": 0.7948969578017664,
+      "grad_norm": 1612358.875,
+      "learning_rate": 1e-05,
+      "loss": 1.2736,
+      "step": 810
+    },
+    {
+      "epoch": 0.8047105004906772,
+      "grad_norm": 1152146.25,
+      "learning_rate": 1e-05,
+      "loss": 1.5657,
+      "step": 820
+    },
+    {
+      "epoch": 0.8145240431795878,
+      "grad_norm": 3.5905027389526367,
+      "learning_rate": 1e-05,
+      "loss": 2.6198,
+      "step": 830
+    },
+    {
+      "epoch": 0.8243375858684985,
+      "grad_norm": 736680.8125,
+      "learning_rate": 1e-05,
+      "loss": 0.9112,
+      "step": 840
+    },
+    {
+      "epoch": 0.8341511285574092,
+      "grad_norm": 2.9653732776641846,
+      "learning_rate": 1e-05,
+      "loss": 2.3842,
+      "step": 850
+    },
+    {
+      "epoch": 0.8439646712463199,
+      "grad_norm": 12.001425743103027,
+      "learning_rate": 1e-05,
+      "loss": 2.3966,
+      "step": 860
+    },
+    {
+      "epoch": 0.8537782139352306,
+      "grad_norm": 2124122.25,
+      "learning_rate": 1e-05,
+      "loss": 1.3734,
+      "step": 870
+    },
+    {
+      "epoch": 0.8635917566241413,
+      "grad_norm": 6534144.0,
+      "learning_rate": 1e-05,
+      "loss": 1.3486,
+      "step": 880
+    },
+    {
+      "epoch": 0.873405299313052,
+      "grad_norm": 3.6779091358184814,
+      "learning_rate": 1e-05,
+      "loss": 0.949,
+      "step": 890
+    },
+    {
+      "epoch": 0.8832188420019627,
+      "grad_norm": 1221940.0,
+      "learning_rate": 1e-05,
+      "loss": 2.6138,
+      "step": 900
+    },
+    {
+      "epoch": 0.8930323846908734,
+      "grad_norm": 1095478.5,
+      "learning_rate": 1e-05,
+      "loss": 1.4675,
+      "step": 910
+    },
+    {
+      "epoch": 0.9028459273797841,
+      "grad_norm": 548933.875,
+      "learning_rate": 1e-05,
+      "loss": 2.8343,
+      "step": 920
+    },
+    {
+      "epoch": 0.9126594700686947,
+      "grad_norm": 13.783559799194336,
+      "learning_rate": 1e-05,
+      "loss": 2.1122,
+      "step": 930
+    },
+    {
+      "epoch": 0.9224730127576055,
+      "grad_norm": 13.174997329711914,
+      "learning_rate": 1e-05,
+      "loss": 2.4962,
+      "step": 940
+    },
+    {
+      "epoch": 0.9322865554465162,
+      "grad_norm": 10.191123962402344,
+      "learning_rate": 1e-05,
+      "loss": 2.2086,
+      "step": 950
+    },
+    {
+      "epoch": 0.9421000981354269,
+      "grad_norm": 3.606752872467041,
+      "learning_rate": 1e-05,
+      "loss": 1.323,
+      "step": 960
+    },
+    {
+      "epoch": 0.9519136408243376,
+      "grad_norm": 2473294.0,
+      "learning_rate": 1e-05,
+      "loss": 1.0528,
+      "step": 970
+    },
+    {
+      "epoch": 0.9617271835132483,
+      "grad_norm": 2.848081588745117,
+      "learning_rate": 1e-05,
+      "loss": 1.5576,
+      "step": 980
+    },
+    {
+      "epoch": 0.971540726202159,
+      "grad_norm": 3.5542256832122803,
+      "learning_rate": 1e-05,
+      "loss": 1.8997,
+      "step": 990
+    },
+    {
+      "epoch": 0.9813542688910697,
+      "grad_norm": 1991637.375,
+      "learning_rate": 1e-05,
+      "loss": 2.5923,
+      "step": 1000
+    },
+    {
+      "epoch": 0.9911678115799804,
+      "grad_norm": 21.8354434967041,
+      "learning_rate": 1e-05,
+      "loss": 2.0656,
+      "step": 1010
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }