maximuspowers commited on
Commit
5e73f53
·
verified ·
1 Parent(s): a31df60

Model save

Browse files
README.md CHANGED
@@ -16,15 +16,15 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [maximuspowers/bert-philosophy-adapted](https://huggingface.co/maximuspowers/bert-philosophy-adapted) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.5291
20
- - Exact Match Accuracy: 0.4
21
- - Macro Precision: 0.1658
22
- - Macro Recall: 0.1265
23
- - Macro F1: 0.1410
24
- - Micro Precision: 0.92
25
- - Micro Recall: 0.4035
26
- - Micro F1: 0.5610
27
- - Hamming Loss: 0.0529
28
 
29
  ## Model description
30
 
@@ -52,22 +52,23 @@ The following hyperparameters were used during training:
52
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
53
  - lr_scheduler_type: linear
54
  - lr_scheduler_warmup_steps: 100
55
- - num_epochs: 50
56
  - mixed_precision_training: Native AMP
57
 
58
  ### Training results
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Exact Match Accuracy | Macro Precision | Macro Recall | Macro F1 | Micro Precision | Micro Recall | Micro F1 | Hamming Loss |
61
  |:-------------:|:-----:|:----:|:---------------:|:--------------------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|:--------:|:------------:|
62
- | 1.7889 | 5.0 | 100 | 1.0021 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0853 |
63
- | 1.156 | 10.0 | 200 | 0.8631 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0838 |
64
- | 0.8775 | 15.0 | 300 | 0.9324 | 0.05 | 0.0588 | 0.0267 | 0.0368 | 1.0 | 0.0877 | 0.1613 | 0.0765 |
65
- | 0.7747 | 20.0 | 400 | 0.7537 | 0.1 | 0.1092 | 0.0615 | 0.0784 | 0.875 | 0.1228 | 0.2154 | 0.075 |
66
- | 0.7074 | 25.0 | 500 | 0.8191 | 0.175 | 0.1487 | 0.0845 | 0.1056 | 0.7857 | 0.1930 | 0.3099 | 0.0721 |
67
- | 0.6281 | 30.0 | 600 | 0.8507 | 0.275 | 0.1574 | 0.1134 | 0.1298 | 0.8421 | 0.2807 | 0.4211 | 0.0647 |
68
- | 0.5506 | 35.0 | 700 | 0.7439 | 0.25 | 0.1563 | 0.1075 | 0.1256 | 0.8333 | 0.2632 | 0.4 | 0.0662 |
69
- | 0.5091 | 40.0 | 800 | 0.7972 | 0.275 | 0.1574 | 0.1134 | 0.1298 | 0.8421 | 0.2807 | 0.4211 | 0.0647 |
70
- | 0.5038 | 45.0 | 900 | 0.8156 | 0.275 | 0.1574 | 0.1134 | 0.1298 | 0.8421 | 0.2807 | 0.4211 | 0.0647 |
 
71
 
72
 
73
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [maximuspowers/bert-philosophy-adapted](https://huggingface.co/maximuspowers/bert-philosophy-adapted) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.7948
20
+ - Exact Match Accuracy: 0.225
21
+ - Macro Precision: 0.2908
22
+ - Macro Recall: 0.1502
23
+ - Macro F1: 0.1930
24
+ - Micro Precision: 0.7083
25
+ - Micro Recall: 0.2982
26
+ - Micro F1: 0.4198
27
+ - Hamming Loss: 0.0691
28
 
29
  ## Model description
30
 
 
52
  - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
53
  - lr_scheduler_type: linear
54
  - lr_scheduler_warmup_steps: 100
55
+ - num_epochs: 500
56
  - mixed_precision_training: Native AMP
57
 
58
  ### Training results
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Exact Match Accuracy | Macro Precision | Macro Recall | Macro F1 | Micro Precision | Micro Recall | Micro F1 | Hamming Loss |
61
  |:-------------:|:-----:|:----:|:---------------:|:--------------------:|:---------------:|:------------:|:--------:|:---------------:|:------------:|:--------:|:------------:|
62
+ | 1.796 | 5.0 | 100 | 0.9528 | 0.0 | 0.0588 | 0.0053 | 0.0098 | 1.0 | 0.0175 | 0.0345 | 0.0824 |
63
+ | 1.142 | 10.0 | 200 | 0.8632 | 0.0 | 0.0588 | 0.0053 | 0.0098 | 1.0 | 0.0175 | 0.0345 | 0.0824 |
64
+ | 0.8805 | 15.0 | 300 | 0.9825 | 0.05 | 0.0490 | 0.0267 | 0.0346 | 0.8333 | 0.0877 | 0.1587 | 0.0779 |
65
+ | 0.7442 | 20.0 | 400 | 0.7654 | 0.1 | 0.1046 | 0.0668 | 0.0804 | 0.8 | 0.1404 | 0.2388 | 0.075 |
66
+ | 0.6332 | 25.0 | 500 | 0.8304 | 0.175 | 0.1433 | 0.0904 | 0.1080 | 0.7059 | 0.2105 | 0.3243 | 0.0735 |
67
+ | 0.5572 | 30.0 | 600 | 0.7903 | 0.225 | 0.1597 | 0.0968 | 0.1200 | 0.8667 | 0.2281 | 0.3611 | 0.0676 |
68
+ | 0.4788 | 35.0 | 700 | 0.7919 | 0.25 | 0.2151 | 0.1173 | 0.1424 | 0.8421 | 0.2807 | 0.4211 | 0.0647 |
69
+ | 0.418 | 40.0 | 800 | 0.7885 | 0.2 | 0.3301 | 0.1355 | 0.1810 | 0.8421 | 0.2807 | 0.4211 | 0.0647 |
70
+ | 0.3975 | 45.0 | 900 | 0.8244 | 0.225 | 0.2291 | 0.1261 | 0.1554 | 0.7273 | 0.2807 | 0.4051 | 0.0691 |
71
+ | 0.3431 | 50.0 | 1000 | 0.7948 | 0.225 | 0.2908 | 0.1502 | 0.1930 | 0.7083 | 0.2982 | 0.4198 | 0.0691 |
72
 
73
 
74
  ### Framework versions
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 45.0,
3
  "eval_exact_match_accuracy": 0.4,
4
  "eval_hamming_loss": 0.052941176470588235,
5
  "eval_loss": 0.5290737152099609,
@@ -13,8 +13,8 @@
13
  "eval_samples_per_second": 188.615,
14
  "eval_steps_per_second": 23.577,
15
  "total_flos": 0.0,
16
- "train_loss": 0.9705644819471572,
17
- "train_runtime": 232.6541,
18
- "train_samples_per_second": 67.912,
19
- "train_steps_per_second": 4.298
20
  }
 
1
  {
2
+ "epoch": 50.0,
3
  "eval_exact_match_accuracy": 0.4,
4
  "eval_hamming_loss": 0.052941176470588235,
5
  "eval_loss": 0.5290737152099609,
 
13
  "eval_samples_per_second": 188.615,
14
  "eval_steps_per_second": 23.577,
15
  "total_flos": 0.0,
16
+ "train_loss": 0.8574352493286133,
17
+ "train_runtime": 257.7927,
18
+ "train_samples_per_second": 612.896,
19
+ "train_steps_per_second": 38.791
20
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:890f9065c802bc97c554e035af9eaa8ef8da20f13c0f284d224585cdb51a36aa
3
  size 441154988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f93741bca9dca2d72ab973d0b940e200dc08392794fd15ca078df27cb31350e
3
  size 441154988
runs/Jun15_00-53-11_92b2e0e6fb20/events.out.tfevents.1749948792.92b2e0e6fb20.2194.12 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:846a6f1b494a7193b33d2ffe15091a5d41830e73450cd6194604a44ab3d77dc9
3
+ size 6680
runs/Jun15_00-58-48_92b2e0e6fb20/events.out.tfevents.1749949130.92b2e0e6fb20.2194.13 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fb5bb1e51ed2a05490aa715749d0ae6cbe279aeca58c68e340c95ed949b284c
3
+ size 62762
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 45.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.9705644819471572,
5
- "train_runtime": 232.6541,
6
- "train_samples_per_second": 67.912,
7
- "train_steps_per_second": 4.298
8
  }
 
1
  {
2
+ "epoch": 50.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.8574352493286133,
5
+ "train_runtime": 257.7927,
6
+ "train_samples_per_second": 612.896,
7
+ "train_steps_per_second": 38.791
8
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66e01343304a8027b49b07fccbfd92f2c7fc70a061de471b2412977d28ec9eac
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d901cfad6e06e460cafaa9ca2058746b5b75a1a99806fb8871440df59f4b2356
3
  size 5368
training_summary.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "bert-philosophy-classifier",
3
+ "base_model": "maximuspowers/bert-philosophy-adapted",
4
+ "dataset": "maximuspowers/philosophai-papers-labeled",
5
+ "training_samples": 316,
6
+ "validation_samples": 40,
7
+ "test_samples": 40,
8
+ "num_epochs": 50,
9
+ "learning_rate": 2e-05,
10
+ "batch_size": 16,
11
+ "contrastive_weight": 0.2,
12
+ "test_results": {
13
+ "loss": 0.5290737152099609,
14
+ "exact_match_accuracy": 0.4,
15
+ "macro_precision": 0.1657754010695187,
16
+ "macro_recall": 0.1264705882352941,
17
+ "macro_f1": 0.14097904608067482,
18
+ "micro_precision": 0.92,
19
+ "micro_recall": 0.40350877192982454,
20
+ "micro_f1": 0.5609756097560976,
21
+ "hamming_loss": 0.052941176470588235,
22
+ "runtime": 0.2121,
23
+ "samples_per_second": 188.615,
24
+ "steps_per_second": 23.577
25
+ },
26
+ "philosophy_schools": [
27
+ "Effective Altruism",
28
+ "Existentialism",
29
+ "Idealism",
30
+ "Empiricism",
31
+ "Utilitarianism",
32
+ "Stoicism",
33
+ "Rationalism",
34
+ "Pragmatism",
35
+ "Cynicism",
36
+ "Confucianism",
37
+ "Hedonism",
38
+ "Deontology",
39
+ "Fanaticism",
40
+ "Nihilism",
41
+ "Absurdism",
42
+ "Transcendentalism",
43
+ "Machiavellanism"
44
+ ]
45
+ }