Model save

Browse files

Files changed (9) hide show

README.md +21 -14
all_results.json +17 -17
model.safetensors +1 -1
runs/Jun15_00-40-18_92b2e0e6fb20/events.out.tfevents.1749948301.92b2e0e6fb20.2194.9 +3 -0
runs/Jun15_00-47-20_92b2e0e6fb20/events.out.tfevents.1749948441.92b2e0e6fb20.2194.10 +3 -0
test_results.json +13 -13
train_results.json +5 -5
trainer_state.json +383 -24
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -16,15 +16,15 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [maximuspowers/bert-philosophy-adapted](https://huggingface.co/maximuspowers/bert-philosophy-adapted) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7200
-- Exact Match Accuracy: 0.2
-- Macro Precision: 0.1583
-- Macro Recall: 0.0909
-- Macro F1: 0.1152
-- Micro Precision: 0.8571
-- Micro Recall: 0.2105
-- Micro F1: 0.3380
-- Hamming Loss: 0.0691
 ## Model description
@@ -44,11 +44,11 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
-- train_batch_size: 16
-- eval_batch_size: 16
 - seed: 42
 - gradient_accumulation_steps: 2
-- total_train_batch_size: 32
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 100
@@ -59,8 +59,15 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Exact Match Accuracy | Macro Precision | Macro Recall | Macro F1 | Micro Precision | Micro Recall | Micro F1 | Hamming Loss |
 |:-------------:|:-----:|:----:|:---------------:|:--------------------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|:--------:|:------------:|
-| 0.811         | 25.0  | 250  | 0.7701          | 0.1                  | 0.1092          | 0.0615       | 0.0784   | 0.875           | 0.1228       | 0.2154   | 0.075        |
-| 0.58          | 50.0  | 500  | 0.7200          | 0.2                  | 0.1583          | 0.0909       | 0.1152   | 0.8571          | 0.2105       | 0.3380   | 0.0691       |
 ### Framework versions

 This model is a fine-tuned version of [maximuspowers/bert-philosophy-adapted](https://huggingface.co/maximuspowers/bert-philosophy-adapted) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8156
+- Exact Match Accuracy: 0.275
+- Macro Precision: 0.1574
+- Macro Recall: 0.1134
+- Macro F1: 0.1298
+- Micro Precision: 0.8421
+- Micro Recall: 0.2807
+- Micro F1: 0.4211
+- Hamming Loss: 0.0647
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 2e-05
+- train_batch_size: 8
+- eval_batch_size: 8
 - seed: 42
 - gradient_accumulation_steps: 2
+- total_train_batch_size: 16
 - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 100
 | Training Loss | Epoch | Step | Validation Loss | Exact Match Accuracy | Macro Precision | Macro Recall | Macro F1 | Micro Precision | Micro Recall | Micro F1 | Hamming Loss |
 |:-------------:|:-----:|:----:|:---------------:|:--------------------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|:--------:|:------------:|
+| 1.7889        | 5.0   | 100  | 1.0021          | 0.0                  | 0.0             | 0.0          | 0.0      | 0.0             | 0.0          | 0.0      | 0.0853       |
+| 1.156         | 10.0  | 200  | 0.8631          | 0.0                  | 0.0             | 0.0          | 0.0      | 0.0             | 0.0          | 0.0      | 0.0838       |
+| 0.8775        | 15.0  | 300  | 0.9324          | 0.05                 | 0.0588          | 0.0267       | 0.0368   | 1.0             | 0.0877       | 0.1613   | 0.0765       |
+| 0.7747        | 20.0  | 400  | 0.7537          | 0.1                  | 0.1092          | 0.0615       | 0.0784   | 0.875           | 0.1228       | 0.2154   | 0.075        |
+| 0.7074        | 25.0  | 500  | 0.8191          | 0.175                | 0.1487          | 0.0845       | 0.1056   | 0.7857          | 0.1930       | 0.3099   | 0.0721       |
+| 0.6281        | 30.0  | 600  | 0.8507          | 0.275                | 0.1574          | 0.1134       | 0.1298   | 0.8421          | 0.2807       | 0.4211   | 0.0647       |
+| 0.5506        | 35.0  | 700  | 0.7439          | 0.25                 | 0.1563          | 0.1075       | 0.1256   | 0.8333          | 0.2632       | 0.4      | 0.0662       |
+| 0.5091        | 40.0  | 800  | 0.7972          | 0.275                | 0.1574          | 0.1134       | 0.1298   | 0.8421          | 0.2807       | 0.4211   | 0.0647       |
+| 0.5038        | 45.0  | 900  | 0.8156          | 0.275                | 0.1574          | 0.1134       | 0.1298   | 0.8421          | 0.2807       | 0.4211   | 0.0647       |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,20 +1,20 @@
 {
-    "epoch": 50.0,
-    "eval_exact_match_accuracy": 0.2,
-    "eval_hamming_loss": 0.075,
-    "eval_loss": 0.8420153856277466,
-    "eval_macro_f1": 0.09192664920219099,
-    "eval_macro_precision": 0.09243697478991597,
-    "eval_macro_recall": 0.09215686274509804,
-    "eval_micro_f1": 0.4,
-    "eval_micro_precision": 0.6071428571428571,
-    "eval_micro_recall": 0.2982456140350877,
-    "eval_runtime": 0.2221,
-    "eval_samples_per_second": 180.125,
-    "eval_steps_per_second": 13.509,
     "total_flos": 0.0,
-    "train_loss": 1.1355848159790038,
-    "train_runtime": 246.5817,
-    "train_samples_per_second": 64.076,
-    "train_steps_per_second": 2.028
 }

 {
+    "epoch": 45.0,
+    "eval_exact_match_accuracy": 0.375,
+    "eval_hamming_loss": 0.052941176470588235,
+    "eval_loss": 0.5750908255577087,
+    "eval_macro_f1": 0.13746934180370715,
+    "eval_macro_precision": 0.17058823529411765,
+    "eval_macro_recall": 0.12058823529411763,
+    "eval_micro_f1": 0.55,
+    "eval_micro_precision": 0.9565217391304348,
+    "eval_micro_recall": 0.38596491228070173,
+    "eval_runtime": 0.2248,
+    "eval_samples_per_second": 177.928,
+    "eval_steps_per_second": 13.345,
     "total_flos": 0.0,
+    "train_loss": 0.9705644819471572,
+    "train_runtime": 232.6541,
+    "train_samples_per_second": 67.912,
+    "train_steps_per_second": 4.298
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d090d5e2966f2091768ffc79be690bc37433eae61a697aae158a43c8f2c1826
 size 441154988

 version https://git-lfs.github.com/spec/v1
+oid sha256:890f9065c802bc97c554e035af9eaa8ef8da20f13c0f284d224585cdb51a36aa
 size 441154988

runs/Jun15_00-40-18_92b2e0e6fb20/events.out.tfevents.1749948301.92b2e0e6fb20.2194.9 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:106c9524987fb915bca106b13f7d5b464b289c45f4975ddfcbcff0c2f11c817f
+size 3837

runs/Jun15_00-47-20_92b2e0e6fb20/events.out.tfevents.1749948441.92b2e0e6fb20.2194.10 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b7d3f157a6e34681655d66b8adcdcc4d0909527514c24f24be959d00ecde095
+size 57078

test_results.json CHANGED Viewed

@@ -1,15 +1,15 @@
 {
-    "epoch": 5.0,
-    "eval_exact_match_accuracy": 0.2,
-    "eval_hamming_loss": 0.075,
-    "eval_loss": 0.8420153856277466,
-    "eval_macro_f1": 0.09192664920219099,
-    "eval_macro_precision": 0.09243697478991597,
-    "eval_macro_recall": 0.09215686274509804,
-    "eval_micro_f1": 0.4,
-    "eval_micro_precision": 0.6071428571428571,
-    "eval_micro_recall": 0.2982456140350877,
-    "eval_runtime": 0.2221,
-    "eval_samples_per_second": 180.125,
-    "eval_steps_per_second": 13.509
 }

 {
+    "epoch": 50.0,
+    "eval_exact_match_accuracy": 0.375,
+    "eval_hamming_loss": 0.052941176470588235,
+    "eval_loss": 0.5750908255577087,
+    "eval_macro_f1": 0.13746934180370715,
+    "eval_macro_precision": 0.17058823529411765,
+    "eval_macro_recall": 0.12058823529411763,
+    "eval_micro_f1": 0.55,
+    "eval_micro_precision": 0.9565217391304348,
+    "eval_micro_recall": 0.38596491228070173,
+    "eval_runtime": 0.2248,
+    "eval_samples_per_second": 177.928,
+    "eval_steps_per_second": 13.345
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 50.0,
     "total_flos": 0.0,
-    "train_loss": 1.1355848159790038,
-    "train_runtime": 246.5817,
-    "train_samples_per_second": 64.076,
-    "train_steps_per_second": 2.028
 }

 {
+    "epoch": 45.0,
     "total_flos": 0.0,
+    "train_loss": 0.9705644819471572,
+    "train_runtime": 232.6541,
+    "train_samples_per_second": 67.912,
+    "train_steps_per_second": 4.298
 }

trainer_state.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "best_global_step": null,
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 5.0,
   "eval_steps": 250,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12,46 +12,405 @@
     {
       "epoch": 0,
       "step": 0,
-      "train/classification_loss": 0.6251798272132874,
-      "train/contrastive_loss": 1.386080265045166,
-      "train/negative_loss": 1.1070373058319092,
       "train/num_negatives": 190,
       "train/num_positives": 50,
-      "train/positive_loss": 0.27904292941093445,
-      "train/total_loss": 0.9023958444595337
     },
     {
       "epoch": 0,
       "step": 0,
-      "train/classification_loss": 0.6276130080223083,
-      "train/contrastive_loss": 1.6681630611419678,
-      "train/negative_loss": 1.269258737564087,
       "train/num_negatives": 192,
       "train/num_positives": 48,
-      "train/positive_loss": 0.39890438318252563,
-      "train/total_loss": 0.9612456560134888
     },
     {
       "epoch": 5.0,
-      "grad_norm": 10.065888404846191,
-      "learning_rate": 9.800000000000001e-06,
-      "loss": 1.6828,
       "step": 50
     },
     {
       "epoch": 5.0,
       "step": 50,
       "total_flos": 0.0,
-      "train_loss": 1.6828109741210937,
-      "train_runtime": 29.5351,
-      "train_samples_per_second": 53.496,
-      "train_steps_per_second": 1.693
     }
   ],
   "logging_steps": 50,
-  "max_steps": 50,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 5,
   "save_steps": 500,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {

 {
+  "best_global_step": 500,
+  "best_metric": 0.3380281690140845,
+  "best_model_checkpoint": "./bert-philosophy-classifier/checkpoint-500",
+  "epoch": 50.0,
   "eval_steps": 250,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
     {
       "epoch": 0,
       "step": 0,
+      "train/classification_loss": 0.679158627986908,
+      "train/contrastive_loss": 9.516982078552246,
+      "train/negative_loss": 9.516908645629883,
       "train/num_negatives": 190,
       "train/num_positives": 50,
+      "train/positive_loss": 7.310241926461458e-05,
+      "train/total_loss": 2.582555055618286
     },
     {
       "epoch": 0,
       "step": 0,
+      "train/classification_loss": 0.6693864464759827,
+      "train/contrastive_loss": 9.331222534179688,
+      "train/negative_loss": 9.331130981445312,
       "train/num_negatives": 192,
       "train/num_positives": 48,
+      "train/positive_loss": 9.195055463351309e-05,
+      "train/total_loss": 2.535630941390991
     },
     {
       "epoch": 5.0,
+      "grad_norm": 17.358003616333008,
+      "learning_rate": 9.600000000000001e-06,
+      "loss": 3.6257,
       "step": 50
     },
     {
       "epoch": 5.0,
       "step": 50,
+      "train/classification_loss": 0.6362661719322205,
+      "train/contrastive_loss": 1.4868279695510864,
+      "train/negative_loss": 1.3825407028198242,
+      "train/num_negatives": 170,
+      "train/num_positives": 66,
+      "train/positive_loss": 0.10428724437952042,
+      "train/total_loss": 0.9336317777633667
+    },
+    {
+      "epoch": 5.0,
+      "step": 50,
+      "train/classification_loss": 0.6401901245117188,
+      "train/contrastive_loss": 1.6219159364700317,
+      "train/negative_loss": 1.440779447555542,
+      "train/num_negatives": 186,
+      "train/num_positives": 54,
+      "train/positive_loss": 0.18113651871681213,
+      "train/total_loss": 0.964573323726654
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 20.2652645111084,
+      "learning_rate": 1.9600000000000002e-05,
+      "loss": 1.6163,
+      "step": 100
+    },
+    {
+      "epoch": 10.0,
+      "step": 100,
+      "train/classification_loss": 0.44592994451522827,
+      "train/contrastive_loss": 0.9996287822723389,
+      "train/negative_loss": 0.8505972027778625,
+      "train/num_negatives": 190,
+      "train/num_positives": 50,
+      "train/positive_loss": 0.1490315943956375,
+      "train/total_loss": 0.645855724811554
+    },
+    {
+      "epoch": 10.0,
+      "step": 100,
+      "train/classification_loss": 0.4469062089920044,
+      "train/contrastive_loss": 1.1934728622436523,
+      "train/negative_loss": 0.9309344291687012,
+      "train/num_negatives": 206,
+      "train/num_positives": 30,
+      "train/positive_loss": 0.26253849267959595,
+      "train/total_loss": 0.685600757598877
+    },
+    {
+      "epoch": 15.0,
+      "grad_norm": 8.280580520629883,
+      "learning_rate": 1.76e-05,
+      "loss": 1.1607,
+      "step": 150
+    },
+    {
+      "epoch": 15.0,
+      "step": 150,
+      "train/classification_loss": 0.32877856492996216,
+      "train/contrastive_loss": 1.1336479187011719,
+      "train/negative_loss": 0.9981224536895752,
+      "train/num_negatives": 170,
+      "train/num_positives": 66,
+      "train/positive_loss": 0.1355254054069519,
+      "train/total_loss": 0.5555081367492676
+    },
+    {
+      "epoch": 15.0,
+      "step": 150,
+      "train/classification_loss": 0.3285317122936249,
+      "train/contrastive_loss": 0.8869010210037231,
+      "train/negative_loss": 0.591576099395752,
+      "train/num_negatives": 202,
+      "train/num_positives": 38,
+      "train/positive_loss": 0.2953248918056488,
+      "train/total_loss": 0.5059119462966919
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 7.707197189331055,
+      "learning_rate": 1.5100000000000001e-05,
+      "loss": 0.9196,
+      "step": 200
+    },
+    {
+      "epoch": 20.0,
+      "step": 200,
+      "train/classification_loss": 0.293140172958374,
+      "train/contrastive_loss": 0.7223706245422363,
+      "train/negative_loss": 0.5778605937957764,
+      "train/num_negatives": 202,
+      "train/num_positives": 30,
+      "train/positive_loss": 0.14451001584529877,
+      "train/total_loss": 0.4376143217086792
+    },
+    {
+      "epoch": 20.0,
+      "step": 200,
+      "train/classification_loss": 0.2644300162792206,
+      "train/contrastive_loss": 0.4585617780685425,
+      "train/negative_loss": 0.39372602105140686,
+      "train/num_negatives": 184,
+      "train/num_positives": 56,
+      "train/positive_loss": 0.06483575701713562,
+      "train/total_loss": 0.3561423718929291
+    },
+    {
+      "epoch": 25.0,
+      "grad_norm": 6.953479766845703,
+      "learning_rate": 1.2600000000000001e-05,
+      "loss": 0.811,
+      "step": 250
+    },
+    {
+      "epoch": 25.0,
+      "step": 250,
+      "train/classification_loss": 0.2595597505569458,
+      "train/contrastive_loss": 2.3272764682769775,
+      "train/negative_loss": 1.8330672979354858,
+      "train/num_negatives": 196,
+      "train/num_positives": 44,
+      "train/positive_loss": 0.4942092299461365,
+      "train/total_loss": 0.7250150442123413
+    },
+    {
+      "epoch": 25.0,
+      "step": 250,
+      "train/classification_loss": 0.2660799026489258,
+      "train/contrastive_loss": 3.3698394298553467,
+      "train/negative_loss": 1.8154842853546143,
+      "train/num_negatives": 210,
+      "train/num_positives": 30,
+      "train/positive_loss": 1.5543551445007324,
+      "train/total_loss": 0.9400478005409241
+    },
+    {
+      "epoch": 25.0,
+      "step": 250,
+      "train/classification_loss": 0.2840481698513031,
+      "train/contrastive_loss": 1.1826257705688477,
+      "train/negative_loss": 1.1373339891433716,
+      "train/num_negatives": 46,
+      "train/num_positives": 8,
+      "train/positive_loss": 0.045291826128959656,
+      "train/total_loss": 0.5205733180046082
+    },
+    {
+      "epoch": 25.0,
+      "eval_exact_match_accuracy": 0.1,
+      "eval_hamming_loss": 0.075,
+      "eval_loss": 0.7701398134231567,
+      "eval_macro_f1": 0.0784313725490196,
+      "eval_macro_precision": 0.1092436974789916,
+      "eval_macro_recall": 0.06149732620320855,
+      "eval_micro_f1": 0.2153846153846154,
+      "eval_micro_precision": 0.875,
+      "eval_micro_recall": 0.12280701754385964,
+      "eval_runtime": 0.219,
+      "eval_samples_per_second": 182.685,
+      "eval_steps_per_second": 13.701,
+      "step": 250
+    },
+    {
+      "epoch": 25.0,
+      "step": 250,
+      "train/classification_loss": 0.25078481435775757,
+      "train/contrastive_loss": 0.9467111825942993,
+      "train/negative_loss": 0.8433182835578918,
+      "train/num_negatives": 198,
+      "train/num_positives": 40,
+      "train/positive_loss": 0.10339287668466568,
+      "train/total_loss": 0.44012707471847534
+    },
+    {
+      "epoch": 25.0,
+      "step": 250,
+      "train/classification_loss": 0.23322956264019012,
+      "train/contrastive_loss": 0.4987642168998718,
+      "train/negative_loss": 0.48307880759239197,
+      "train/num_negatives": 172,
+      "train/num_positives": 68,
+      "train/positive_loss": 0.015685414895415306,
+      "train/total_loss": 0.3329824209213257
+    },
+    {
+      "epoch": 30.0,
+      "grad_norm": 11.7496976852417,
+      "learning_rate": 1.0100000000000002e-05,
+      "loss": 0.7395,
+      "step": 300
+    },
+    {
+      "epoch": 30.0,
+      "step": 300,
+      "train/classification_loss": 0.22414086759090424,
+      "train/contrastive_loss": 0.9544009566307068,
+      "train/negative_loss": 0.6044885516166687,
+      "train/num_negatives": 186,
+      "train/num_positives": 54,
+      "train/positive_loss": 0.3499124050140381,
+      "train/total_loss": 0.41502106189727783
+    },
+    {
+      "epoch": 30.0,
+      "step": 300,
+      "train/classification_loss": 0.21396367251873016,
+      "train/contrastive_loss": 0.4959838390350342,
+      "train/negative_loss": 0.4717627763748169,
+      "train/num_negatives": 198,
+      "train/num_positives": 42,
+      "train/positive_loss": 0.02422107383608818,
+      "train/total_loss": 0.3131604492664337
+    },
+    {
+      "epoch": 35.0,
+      "grad_norm": 5.532268047332764,
+      "learning_rate": 7.600000000000001e-06,
+      "loss": 0.6737,
+      "step": 350
+    },
+    {
+      "epoch": 35.0,
+      "step": 350,
+      "train/classification_loss": 0.21886315941810608,
+      "train/contrastive_loss": 0.5652549266815186,
+      "train/negative_loss": 0.4682881832122803,
+      "train/num_negatives": 172,
+      "train/num_positives": 68,
+      "train/positive_loss": 0.09696672856807709,
+      "train/total_loss": 0.33191415667533875
+    },
+    {
+      "epoch": 35.0,
+      "step": 350,
+      "train/classification_loss": 0.1887310892343521,
+      "train/contrastive_loss": 0.18129800260066986,
+      "train/negative_loss": 0.17543496191501617,
+      "train/num_negatives": 152,
+      "train/num_positives": 88,
+      "train/positive_loss": 0.005863038823008537,
+      "train/total_loss": 0.22499069571495056
+    },
+    {
+      "epoch": 40.0,
+      "grad_norm": 5.668190002441406,
+      "learning_rate": 5.1e-06,
+      "loss": 0.6269,
+      "step": 400
+    },
+    {
+      "epoch": 40.0,
+      "step": 400,
+      "train/classification_loss": 0.18238115310668945,
+      "train/contrastive_loss": 0.33620232343673706,
+      "train/negative_loss": 0.2550373077392578,
+      "train/num_negatives": 156,
+      "train/num_positives": 84,
+      "train/positive_loss": 0.08116500079631805,
+      "train/total_loss": 0.24962162971496582
+    },
+    {
+      "epoch": 40.0,
+      "step": 400,
+      "train/classification_loss": 0.22312195599079132,
+      "train/contrastive_loss": 0.6893786191940308,
+      "train/negative_loss": 0.6744635105133057,
+      "train/num_negatives": 206,
+      "train/num_positives": 34,
+      "train/positive_loss": 0.014915116131305695,
+      "train/total_loss": 0.36099767684936523
+    },
+    {
+      "epoch": 45.0,
+      "grad_norm": 4.168755054473877,
+      "learning_rate": 2.6e-06,
+      "loss": 0.6025,
+      "step": 450
+    },
+    {
+      "epoch": 45.0,
+      "step": 450,
+      "train/classification_loss": 0.1997791826725006,
+      "train/contrastive_loss": 0.606022834777832,
+      "train/negative_loss": 0.5423316955566406,
+      "train/num_negatives": 216,
+      "train/num_positives": 24,
+      "train/positive_loss": 0.0636911541223526,
+      "train/total_loss": 0.32098376750946045
+    },
+    {
+      "epoch": 45.0,
+      "step": 450,
+      "train/classification_loss": 0.21638567745685577,
+      "train/contrastive_loss": 0.37323933839797974,
+      "train/negative_loss": 0.3397449851036072,
+      "train/num_negatives": 164,
+      "train/num_positives": 62,
+      "train/positive_loss": 0.03349434956908226,
+      "train/total_loss": 0.291033536195755
+    },
+    {
+      "epoch": 50.0,
+      "grad_norm": 5.069293022155762,
+      "learning_rate": 1.0000000000000001e-07,
+      "loss": 0.58,
+      "step": 500
+    },
+    {
+      "epoch": 50.0,
+      "step": 500,
+      "train/classification_loss": 0.22550146281719208,
+      "train/contrastive_loss": 2.474167823791504,
+      "train/negative_loss": 1.7999987602233887,
+      "train/num_negatives": 196,
+      "train/num_positives": 44,
+      "train/positive_loss": 0.6741690635681152,
+      "train/total_loss": 0.7203350067138672
+    },
+    {
+      "epoch": 50.0,
+      "step": 500,
+      "train/classification_loss": 0.23388545215129852,
+      "train/contrastive_loss": 3.272613763809204,
+      "train/negative_loss": 1.7668838500976562,
+      "train/num_negatives": 210,
+      "train/num_positives": 30,
+      "train/positive_loss": 1.5057299137115479,
+      "train/total_loss": 0.8884082436561584
+    },
+    {
+      "epoch": 50.0,
+      "step": 500,
+      "train/classification_loss": 0.2511661648750305,
+      "train/contrastive_loss": 0.6579197645187378,
+      "train/negative_loss": 0.537192702293396,
+      "train/num_negatives": 46,
+      "train/num_positives": 8,
+      "train/positive_loss": 0.12072707712650299,
+      "train/total_loss": 0.38275012373924255
+    },
+    {
+      "epoch": 50.0,
+      "eval_exact_match_accuracy": 0.2,
+      "eval_hamming_loss": 0.06911764705882353,
+      "eval_loss": 0.7200472950935364,
+      "eval_macro_f1": 0.11519607843137254,
+      "eval_macro_precision": 0.15826330532212887,
+      "eval_macro_recall": 0.09090909090909091,
+      "eval_micro_f1": 0.3380281690140845,
+      "eval_micro_precision": 0.8571428571428571,
+      "eval_micro_recall": 0.21052631578947367,
+      "eval_runtime": 0.219,
+      "eval_samples_per_second": 182.641,
+      "eval_steps_per_second": 13.698,
+      "step": 500
+    },
+    {
+      "epoch": 50.0,
+      "step": 500,
       "total_flos": 0.0,
+      "train_loss": 1.1355848159790038,
+      "train_runtime": 246.5817,
+      "train_samples_per_second": 64.076,
+      "train_steps_per_second": 2.028
     }
   ],
   "logging_steps": 50,
+  "max_steps": 500,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 50,
   "save_steps": 500,
   "stateful_callbacks": {
     "EarlyStoppingCallback": {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:03e2d330b9dd8fe925b85bea0db478c22579b7da080ec4cac0c4183a4c7358e0
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:66e01343304a8027b49b07fccbfd92f2c7fc70a061de471b2412977d28ec9eac
 size 5368