aisuko/encoder-L6-V2
Browse files
README.md
CHANGED
|
@@ -371,10 +371,10 @@ model-index:
|
|
| 371 |
type: sts-dev
|
| 372 |
metrics:
|
| 373 |
- type: pearson_cosine
|
| 374 |
-
value: 0.
|
| 375 |
name: Pearson Cosine
|
| 376 |
- type: spearman_cosine
|
| 377 |
-
value: 0.
|
| 378 |
name: Spearman Cosine
|
| 379 |
- task:
|
| 380 |
type: semantic-similarity
|
|
@@ -384,10 +384,10 @@ model-index:
|
|
| 384 |
type: sts-test
|
| 385 |
metrics:
|
| 386 |
- type: pearson_cosine
|
| 387 |
-
value: 0.
|
| 388 |
name: Pearson Cosine
|
| 389 |
- type: spearman_cosine
|
| 390 |
-
value: 0.
|
| 391 |
name: Spearman Cosine
|
| 392 |
---
|
| 393 |
|
|
@@ -491,8 +491,8 @@ You can finetune this model on your own dataset.
|
|
| 491 |
|
| 492 |
| Metric | sts-dev | sts-test |
|
| 493 |
|:--------------------|:-----------|:-----------|
|
| 494 |
-
| pearson_cosine | 0.
|
| 495 |
-
| **spearman_cosine** | **0.
|
| 496 |
|
| 497 |
<!--
|
| 498 |
## Bias, Risks and Limitations
|
|
@@ -567,7 +567,7 @@ You can finetune this model on your own dataset.
|
|
| 567 |
- `per_device_train_batch_size`: 16
|
| 568 |
- `per_device_eval_batch_size`: 16
|
| 569 |
- `learning_rate`: 2e-05
|
| 570 |
-
- `num_train_epochs`:
|
| 571 |
- `warmup_ratio`: 0.1
|
| 572 |
- `fp16`: True
|
| 573 |
- `batch_sampler`: no_duplicates
|
|
@@ -592,7 +592,7 @@ You can finetune this model on your own dataset.
|
|
| 592 |
- `adam_beta2`: 0.999
|
| 593 |
- `adam_epsilon`: 1e-08
|
| 594 |
- `max_grad_norm`: 1.0
|
| 595 |
-
- `num_train_epochs`:
|
| 596 |
- `max_steps`: -1
|
| 597 |
- `lr_scheduler_type`: linear
|
| 598 |
- `lr_scheduler_kwargs`: {}
|
|
@@ -693,15 +693,60 @@ You can finetune this model on your own dataset.
|
|
| 693 |
</details>
|
| 694 |
|
| 695 |
### Training Logs
|
| 696 |
-
| Epoch
|
| 697 |
-
|
| 698 |
-
| -1
|
| 699 |
-
| 0.3953
|
| 700 |
-
| 0.7905
|
| 701 |
-
| 1.1858
|
| 702 |
-
| 1.5810
|
| 703 |
-
| 1.9763
|
| 704 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 705 |
|
| 706 |
|
| 707 |
### Framework Versions
|
|
|
|
| 371 |
type: sts-dev
|
| 372 |
metrics:
|
| 373 |
- type: pearson_cosine
|
| 374 |
+
value: 0.8793284892973376
|
| 375 |
name: Pearson Cosine
|
| 376 |
- type: spearman_cosine
|
| 377 |
+
value: 0.876484495899188
|
| 378 |
name: Spearman Cosine
|
| 379 |
- task:
|
| 380 |
type: semantic-similarity
|
|
|
|
| 384 |
type: sts-test
|
| 385 |
metrics:
|
| 386 |
- type: pearson_cosine
|
| 387 |
+
value: 0.879334132854901
|
| 388 |
name: Pearson Cosine
|
| 389 |
- type: spearman_cosine
|
| 390 |
+
value: 0.8764936381058213
|
| 391 |
name: Spearman Cosine
|
| 392 |
---
|
| 393 |
|
|
|
|
| 491 |
|
| 492 |
| Metric | sts-dev | sts-test |
|
| 493 |
|:--------------------|:-----------|:-----------|
|
| 494 |
+
| pearson_cosine | 0.8793 | 0.8793 |
|
| 495 |
+
| **spearman_cosine** | **0.8765** | **0.8765** |
|
| 496 |
|
| 497 |
<!--
|
| 498 |
## Bias, Risks and Limitations
|
|
|
|
| 567 |
- `per_device_train_batch_size`: 16
|
| 568 |
- `per_device_eval_batch_size`: 16
|
| 569 |
- `learning_rate`: 2e-05
|
| 570 |
+
- `num_train_epochs`: 20
|
| 571 |
- `warmup_ratio`: 0.1
|
| 572 |
- `fp16`: True
|
| 573 |
- `batch_sampler`: no_duplicates
|
|
|
|
| 592 |
- `adam_beta2`: 0.999
|
| 593 |
- `adam_epsilon`: 1e-08
|
| 594 |
- `max_grad_norm`: 1.0
|
| 595 |
+
- `num_train_epochs`: 20
|
| 596 |
- `max_steps`: -1
|
| 597 |
- `lr_scheduler_type`: linear
|
| 598 |
- `lr_scheduler_kwargs`: {}
|
|
|
|
| 693 |
</details>
|
| 694 |
|
| 695 |
### Training Logs
|
| 696 |
+
| Epoch | Step | Training Loss | Validation Loss | sts-dev_spearman_cosine | sts-test_spearman_cosine |
|
| 697 |
+
|:-------:|:----:|:-------------:|:---------------:|:-----------------------:|:------------------------:|
|
| 698 |
+
| -1 | -1 | - | - | 0.8671 | - |
|
| 699 |
+
| 0.3953 | 100 | 0.0422 | 0.0031 | 0.8701 | - |
|
| 700 |
+
| 0.7905 | 200 | 0.0105 | 0.0017 | 0.8727 | - |
|
| 701 |
+
| 1.1858 | 300 | 0.0041 | 0.0016 | 0.8728 | - |
|
| 702 |
+
| 1.5810 | 400 | 0.0016 | 0.0011 | 0.8730 | - |
|
| 703 |
+
| 1.9763 | 500 | 0.0039 | 0.0021 | 0.8731 | - |
|
| 704 |
+
| 2.3715 | 600 | 0.0014 | 0.0020 | 0.8741 | - |
|
| 705 |
+
| 2.7668 | 700 | 0.0014 | 0.0017 | 0.8744 | - |
|
| 706 |
+
| 3.1621 | 800 | 0.0019 | 0.0009 | 0.8742 | - |
|
| 707 |
+
| 3.5573 | 900 | 0.0012 | 0.0011 | 0.8754 | - |
|
| 708 |
+
| 3.9526 | 1000 | 0.0016 | 0.0015 | 0.8760 | - |
|
| 709 |
+
| 4.3478 | 1100 | 0.0021 | 0.0011 | 0.8763 | - |
|
| 710 |
+
| 4.7431 | 1200 | 0.0006 | 0.0009 | 0.8753 | - |
|
| 711 |
+
| 5.1383 | 1300 | 0.0004 | 0.0009 | 0.8753 | - |
|
| 712 |
+
| 5.5336 | 1400 | 0.0008 | 0.0008 | 0.8751 | - |
|
| 713 |
+
| 5.9289 | 1500 | 0.0004 | 0.0004 | 0.8743 | - |
|
| 714 |
+
| 6.3241 | 1600 | 0.0009 | 0.0008 | 0.8758 | - |
|
| 715 |
+
| 6.7194 | 1700 | 0.0005 | 0.0009 | 0.8747 | - |
|
| 716 |
+
| 7.1146 | 1800 | 0.0004 | 0.0006 | 0.8742 | - |
|
| 717 |
+
| 7.5099 | 1900 | 0.0003 | 0.0010 | 0.8748 | - |
|
| 718 |
+
| 7.9051 | 2000 | 0.0006 | 0.0008 | 0.8742 | - |
|
| 719 |
+
| 8.3004 | 2100 | 0.0005 | 0.0007 | 0.8744 | - |
|
| 720 |
+
| 8.6957 | 2200 | 0.0003 | 0.0006 | 0.8748 | - |
|
| 721 |
+
| 9.0909 | 2300 | 0.0005 | 0.0012 | 0.8749 | - |
|
| 722 |
+
| 9.4862 | 2400 | 0.0007 | 0.0006 | 0.8762 | - |
|
| 723 |
+
| 9.8814 | 2500 | 0.0003 | 0.0009 | 0.8762 | - |
|
| 724 |
+
| 10.2767 | 2600 | 0.0004 | 0.0007 | 0.8759 | - |
|
| 725 |
+
| 10.6719 | 2700 | 0.0005 | 0.0005 | 0.8760 | - |
|
| 726 |
+
| 11.0672 | 2800 | 0.0005 | 0.0007 | 0.8754 | - |
|
| 727 |
+
| 11.4625 | 2900 | 0.0002 | 0.0008 | 0.8749 | - |
|
| 728 |
+
| 11.8577 | 3000 | 0.0002 | 0.0007 | 0.8749 | - |
|
| 729 |
+
| 12.2530 | 3100 | 0.0003 | 0.0007 | 0.8752 | - |
|
| 730 |
+
| 12.6482 | 3200 | 0.0004 | 0.0008 | 0.8760 | - |
|
| 731 |
+
| 13.0435 | 3300 | 0.0002 | 0.0008 | 0.8767 | - |
|
| 732 |
+
| 13.4387 | 3400 | 0.0002 | 0.0007 | 0.8763 | - |
|
| 733 |
+
| 13.8340 | 3500 | 0.0002 | 0.0007 | 0.8763 | - |
|
| 734 |
+
| 14.2292 | 3600 | 0.0001 | 0.0007 | 0.8764 | - |
|
| 735 |
+
| 14.6245 | 3700 | 0.0003 | 0.0006 | 0.8765 | - |
|
| 736 |
+
| 15.0198 | 3800 | 0.0002 | 0.0005 | 0.8757 | - |
|
| 737 |
+
| 15.4150 | 3900 | 0.0002 | 0.0004 | 0.8760 | - |
|
| 738 |
+
| 15.8103 | 4000 | 0.0002 | 0.0005 | 0.8765 | - |
|
| 739 |
+
| 16.2055 | 4100 | 0.0002 | 0.0005 | 0.8757 | - |
|
| 740 |
+
| 16.6008 | 4200 | 0.0002 | 0.0006 | 0.8758 | - |
|
| 741 |
+
| 16.9960 | 4300 | 0.0002 | 0.0006 | 0.8758 | - |
|
| 742 |
+
| 17.3913 | 4400 | 0.0001 | 0.0005 | 0.8761 | - |
|
| 743 |
+
| 17.7866 | 4500 | 0.0002 | 0.0005 | 0.8765 | - |
|
| 744 |
+
| 18.1818 | 4600 | 0.0001 | 0.0005 | 0.8767 | - |
|
| 745 |
+
| 18.5771 | 4700 | 0.0004 | 0.0004 | 0.8765 | - |
|
| 746 |
+
| 18.9723 | 4800 | 0.0002 | 0.0004 | 0.8765 | - |
|
| 747 |
+
| 19.3676 | 4900 | 0.0001 | 0.0004 | 0.8765 | - |
|
| 748 |
+
| 19.7628 | 5000 | 0.0001 | 0.0004 | 0.8765 | - |
|
| 749 |
+
| -1 | -1 | - | - | - | 0.8765 |
|
| 750 |
|
| 751 |
|
| 752 |
### Framework Versions
|
eval/similarity_evaluation_sts-dev_results.csv
CHANGED
|
@@ -1,6 +1,51 @@
|
|
| 1 |
epoch,steps,cosine_pearson,cosine_spearman
|
| 2 |
-
0.3952569169960474,100,0.
|
| 3 |
-
0.7905138339920948,200,0.
|
| 4 |
-
1.1857707509881423,300,0.
|
| 5 |
-
1.5810276679841897,400,0.
|
| 6 |
-
1.9762845849802373,500,0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
epoch,steps,cosine_pearson,cosine_spearman
|
| 2 |
+
0.3952569169960474,100,0.8726574686332377,0.8701488397000402
|
| 3 |
+
0.7905138339920948,200,0.8750412159651169,0.8726839887282013
|
| 4 |
+
1.1857707509881423,300,0.8753877408269072,0.8728314917829469
|
| 5 |
+
1.5810276679841897,400,0.8755002384937195,0.8730160264103479
|
| 6 |
+
1.9762845849802373,500,0.8754568126808655,0.8730608567554764
|
| 7 |
+
2.3715415019762847,600,0.8764479319487279,0.8741036593663819
|
| 8 |
+
2.766798418972332,700,0.876716357365046,0.8744216435449826
|
| 9 |
+
3.1620553359683794,800,0.8769502757898068,0.8742408946513359
|
| 10 |
+
3.5573122529644268,900,0.8778959648238698,0.8753541719583066
|
| 11 |
+
3.9525691699604746,1000,0.8782073735310392,0.875995768253204
|
| 12 |
+
4.3478260869565215,1100,0.8782874580178233,0.8763176429224164
|
| 13 |
+
4.743083003952569,1200,0.8773850228755735,0.8752935745709093
|
| 14 |
+
5.138339920948616,1300,0.8776664219131245,0.8752633704614471
|
| 15 |
+
5.533596837944664,1400,0.8777052956487652,0.8750834998009465
|
| 16 |
+
5.928853754940711,1500,0.8771876684184248,0.874268707056813
|
| 17 |
+
6.324110671936759,1600,0.8783866562432394,0.8758489022146425
|
| 18 |
+
6.719367588932807,1700,0.8773280444211742,0.8747220970980495
|
| 19 |
+
7.1146245059288535,1800,0.8772226610659287,0.8741577088787629
|
| 20 |
+
7.509881422924901,1900,0.8777556784538332,0.8747573803212756
|
| 21 |
+
7.905138339920948,2000,0.8772752919248035,0.8742248935107192
|
| 22 |
+
8.300395256916996,2100,0.8773457895970054,0.8744451241750334
|
| 23 |
+
8.695652173913043,2200,0.8777390076857146,0.8748403436720705
|
| 24 |
+
9.090909090909092,2300,0.8778036731914596,0.874892339561251
|
| 25 |
+
9.486166007905139,2400,0.8788958405775648,0.8761652796162344
|
| 26 |
+
9.881422924901186,2500,0.879154150815699,0.8762057113496172
|
| 27 |
+
10.276679841897232,2600,0.8791104564190314,0.8759225851249959
|
| 28 |
+
10.671936758893281,2700,0.879033821135453,0.8759599753501491
|
| 29 |
+
11.067193675889328,2800,0.8785685708682617,0.8753595715827753
|
| 30 |
+
11.462450592885375,2900,0.8781219770789246,0.874924953946234
|
| 31 |
+
11.857707509881424,3000,0.8781454194128506,0.8749477032532361
|
| 32 |
+
12.25296442687747,3100,0.8782273749893232,0.8752395632044004
|
| 33 |
+
12.648221343873518,3200,0.878914382490899,0.8760178103697416
|
| 34 |
+
13.043478260869565,3300,0.8797597152364672,0.8767370479047202
|
| 35 |
+
13.438735177865613,3400,0.8794057809115553,0.8762642189729413
|
| 36 |
+
13.83399209486166,3500,0.8795257831607257,0.8763472499399795
|
| 37 |
+
14.229249011857707,3600,0.8794872382177779,0.8763557837108928
|
| 38 |
+
14.624505928853754,3700,0.8796308963904972,0.87653676759288
|
| 39 |
+
15.019762845849803,3800,0.8789295051002636,0.8756953040227562
|
| 40 |
+
15.41501976284585,3900,0.8791779539069075,0.8760281564336045
|
| 41 |
+
15.810276679841897,4000,0.879447460410647,0.8765326242080741
|
| 42 |
+
16.205533596837945,4100,0.878773475877337,0.8757367446612416
|
| 43 |
+
16.600790513833992,4200,0.8787830651894294,0.875833790985186
|
| 44 |
+
16.99604743083004,4300,0.8787836656682418,0.8758472519556568
|
| 45 |
+
17.391304347826086,4400,0.8789941407965253,0.876094417793626
|
| 46 |
+
17.786561264822133,4500,0.8794493131770009,0.8765398344559159
|
| 47 |
+
18.181818181818183,4600,0.8796029184123103,0.8767072576115775
|
| 48 |
+
18.57707509881423,4700,0.8793276744980529,0.876472627469176
|
| 49 |
+
18.972332015810277,4800,0.8793055366972323,0.8764677430853106
|
| 50 |
+
19.367588932806324,4900,0.8793250934718451,0.876466894323524
|
| 51 |
+
19.76284584980237,5000,0.8793284892973376,0.876484495899188
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 90864192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f887edcd7d7aa18567f610560bee20f661d79799d61725bb94243faeb34ca84
|
| 3 |
size 90864192
|
runs/Jul30_13-09-19_4e06a36bb887/events.out.tfevents.1753880978.4e06a36bb887.13.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1916035c4f5ef64480ce54bfd0abbc22549e3ad5d7e76e5937272e6fdb9eab14
|
| 3 |
+
size 35409
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5560
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:082544a60a23996b4ec5d37fc9f549909591684defafc26484a1ec9f0b7b7e80
|
| 3 |
size 5560
|