Santi Diana commited on
Commit
8a7bf5e
·
1 Parent(s): 130e3bd

Updated to 24 models. 10 models outperform the current SOTA

Browse files
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. add_new_model/mteb_metadata.yaml +23 -23
  3. app.py +1 -1
  4. data/classification.csv +20 -14
  5. data/general.csv +20 -14
  6. data/sts.csv +20 -14
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
add_new_model/mteb_metadata.yaml CHANGED
@@ -2,7 +2,7 @@
2
  tags:
3
  - mteb
4
  model-index:
5
- - name: multilingual-e5-base-stsb-tuned-b16-e10-all-languages
6
  results:
7
  - task:
8
  type: Classification
@@ -14,9 +14,9 @@ model-index:
14
  revision: 1399c76144fd37290681b995c656ef9b2e06e26d
15
  metrics:
16
  - type: accuracy
17
- value: 43.04
18
  - type: f1
19
- value: 40.906126104515174
20
  - task:
21
  type: Classification
22
  dataset:
@@ -27,9 +27,9 @@ model-index:
27
  revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
28
  metrics:
29
  - type: accuracy
30
- value: 88.81254169446298
31
  - type: f1
32
- value: 88.19994853608793
33
  - task:
34
  type: Classification
35
  dataset:
@@ -40,9 +40,9 @@ model-index:
40
  revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
41
  metrics:
42
  - type: accuracy
43
- value: 59.252835223482336
44
  - type: f1
45
- value: 42.617395241215604
46
  - task:
47
  type: Classification
48
  dataset:
@@ -53,9 +53,9 @@ model-index:
53
  revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
54
  metrics:
55
  - type: accuracy
56
- value: 63.03967720242098
57
  - type: f1
58
- value: 62.32575868187973
59
  - task:
60
  type: Classification
61
  dataset:
@@ -66,9 +66,9 @@ model-index:
66
  revision: 7d571f92784cd94a019292a1f45445077d0ef634
67
  metrics:
68
  - type: accuracy
69
- value: 67.80094149293879
70
  - type: f1
71
- value: 67.470213025305
72
  - task:
73
  type: STS
74
  dataset:
@@ -79,17 +79,17 @@ model-index:
79
  revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
80
  metrics:
81
  - type: cos_sim_pearson
82
- value: 88.12981800561491
83
  - type: cos_sim_spearman
84
- value: 87.76573047550748
85
  - type: euclidean_pearson
86
- value: 88.20566152098061
87
  - type: euclidean_spearman
88
- value: 87.76635475212959
89
  - type: manhattan_pearson
90
- value: 88.25680923613729
91
  - type: manhattan_spearman
92
- value: 87.91724088147261
93
  - task:
94
  type: STS
95
  dataset:
@@ -100,15 +100,15 @@ model-index:
100
  revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
101
  metrics:
102
  - type: cos_sim_pearson
103
- value: 66.96543486888109
104
  - type: cos_sim_spearman
105
- value: 70.98778280592913
106
  - type: euclidean_pearson
107
- value: 68.49981525066747
108
  - type: euclidean_spearman
109
- value: 70.98778280592913
110
  - type: manhattan_pearson
111
- value: 68.4064756070659
112
  - type: manhattan_spearman
113
- value: 71.04079668196594
114
  ---
 
2
  tags:
3
  - mteb
4
  model-index:
5
+ - name: multilingual-e5-large-STSAUGMENTED-b16-e10
6
  results:
7
  - task:
8
  type: Classification
 
14
  revision: 1399c76144fd37290681b995c656ef9b2e06e26d
15
  metrics:
16
  - type: accuracy
17
+ value: 42.114
18
  - type: f1
19
+ value: 40.962817058318876
20
  - task:
21
  type: Classification
22
  dataset:
 
27
  revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
28
  metrics:
29
  - type: accuracy
30
+ value: 80.92394929953302
31
  - type: f1
32
+ value: 80.87019762034018
33
  - task:
34
  type: Classification
35
  dataset:
 
40
  revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
41
  metrics:
42
  - type: accuracy
43
+ value: 53.32555036691128
44
  - type: f1
45
+ value: 37.93993994854238
46
  - task:
47
  type: Classification
48
  dataset:
 
53
  revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
54
  metrics:
55
  - type: accuracy
56
+ value: 60.178211163416286
57
  - type: f1
58
+ value: 60.03972849482204
59
  - task:
60
  type: Classification
61
  dataset:
 
66
  revision: 7d571f92784cd94a019292a1f45445077d0ef634
67
  metrics:
68
  - type: accuracy
69
+ value: 64.1930060524546
70
  - type: f1
71
+ value: 64.02551940330545
72
  - task:
73
  type: STS
74
  dataset:
 
79
  revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
80
  metrics:
81
  - type: cos_sim_pearson
82
+ value: 88.27146953394649
83
  - type: cos_sim_spearman
84
+ value: 88.57657349986717
85
  - type: euclidean_pearson
86
+ value: 89.07302053356283
87
  - type: euclidean_spearman
88
+ value: 88.57431075359631
89
  - type: manhattan_pearson
90
+ value: 89.11306099359507
91
  - type: manhattan_spearman
92
+ value: 88.61591595032135
93
  - task:
94
  type: STS
95
  dataset:
 
100
  revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
101
  metrics:
102
  - type: cos_sim_pearson
103
+ value: 65.57570700756301
104
  - type: cos_sim_spearman
105
+ value: 70.33230514473709
106
  - type: euclidean_pearson
107
+ value: 66.56411956377407
108
  - type: euclidean_spearman
109
+ value: 70.33230514473709
110
  - type: manhattan_pearson
111
+ value: 66.32203605146594
112
  - type: manhattan_spearman
113
+ value: 70.34809182527435
114
  ---
app.py CHANGED
@@ -37,7 +37,7 @@ def retrieval_dataframe_update():
37
  block = gr.Blocks()
38
  with block:
39
  gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
40
- Massive Text Embedding Benchmark (MTEB) Leaderboard.**
41
  - **Total Datasets**: {NUM_DATASETS}
42
  - **Total Languages**: 1
43
  - **Total Scores**: {NUM_SCORES}
 
37
  block = gr.Blocks()
38
  with block:
39
  gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
40
+ Massive Spanish Text Embedding Benchmark (MSTEB) Leaderboard.**
41
  - **Total Datasets**: {NUM_DATASETS}
42
  - **Total Languages**: 1
43
  - **Total Scores**: {NUM_SCORES}
data/classification.csv CHANGED
@@ -1,19 +1,25 @@
1
  Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
2
- multilingual-e5-large,66.59,42.66,89.95,66.84,64.68,68.85
3
- bge-small-en-v1.5,52.86,32.03,76.93,52.15,48.77,54.42
4
- multilingual-e5-base,63.87,42.47,89.62,60.27,60.51,66.52
5
- multilingual-e5-small,61.13,41.3,87.33,55.87,58.06,63.1
6
- paraphrase-multilingual-mpnet-base-v2,65.67,39.99,86.96,66.59,64.43,70.42
7
- sentence-t5-large,57.87,42.89,80.78,52.07,54.1,59.56
8
  sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
9
  paraphrase-spanish-distilroberta,63.98,38.24,86.81,65.94,60.52,68.39
10
  sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.51,64.21
11
- paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings,64.0,37.25,86.93,66.28,62.6,66.96
12
- mstsb-paraphrase-multilingual-mpnet-base-v2,64.47,38.29,86.04,67.06,63.47,67.53
13
- multilingual-e5-base-b16-e10,65.09,43.4,89.02,61.7,63.06,68.25
14
- multilingual-e5-large-stsb-tuned-b32-e10,66.19,43.31,89.3,64.04,64.62,69.69
15
- multilingual-e5-large-stsb-tuned-b16-e10,67.1,43.72,90.29,65.51,65.13,70.84
16
- multilingual-e5-large-stsb-tuned,66.23,43.62,89.33,62.93,65.11,70.16
17
- multilingual-e5-large-stsb-tuned-b64-e10,64.58,43.71,88.84,60.2,62.74,67.4
18
  LaBSE,61.97,39.39,84.07,64.44,58.32,63.61
19
- multilingual-e5-base-stsb-tuned-b16-e10-all-languages,64.39,43.04,88.81,59.25,63.04,67.8
 
 
 
 
 
 
 
1
  Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
2
+ multilingual-e5-LARGE,66.59,42.66,89.95,66.84,64.68,68.85
3
+ bge-SMALL-en-v1.5,52.86,32.03,76.93,52.15,48.77,54.42
4
+ multilingual-e5-BASE,63.87,42.47,89.62,60.27,60.51,66.52
5
+ multilingual-e5-SMALL,61.13,41.3,87.33,55.87,58.06,63.1
6
+ paraphrase-multilingual-mpnet-BASE-v2,65.67,39.99,86.96,66.59,64.43,70.42
7
+ sentence-t5-LARGE,57.87,42.89,80.78,52.07,54.1,59.56
8
  sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
9
  paraphrase-spanish-distilroberta,63.98,38.24,86.81,65.94,60.52,68.39
10
  sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.51,64.21
11
+ paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,64.0,37.25,86.93,66.28,62.6,66.96
12
+ mstsb-paraphrase-multilingual-mpnet-BASE-v2,64.47,38.29,86.04,67.06,63.47,67.53
13
+ multilingual-e5-BASE-b16-e10,65.09,43.4,89.02,61.7,63.06,68.25
14
+ multilingual-e5-LARGE-stsb-tuned-b32-e10,66.19,43.31,89.3,64.04,64.62,69.69
15
+ multilingual-e5-LARGE-stsb-tuned-b16-e10,67.1,43.72,90.29,65.51,65.13,70.84
16
+ multilingual-e5-LARGE-stsb-tuned-b16-e5,66.23,43.62,89.33,62.93,65.11,70.16
17
+ multilingual-e5-LARGE-stsb-tuned-b64-e10,64.58,43.71,88.84,60.2,62.74,67.4
18
  LaBSE,61.97,39.39,84.07,64.44,58.32,63.61
19
+ multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,64.39,43.04,88.81,59.25,63.04,67.8
20
+ bge-BASE-tuned-b16-e10,50.83,31.34,74.1,45.63,48.72,54.36
21
+ multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,64.93,42.98,87.96,60.91,63.78,69.02
22
+ bge-LARGE-tuned-b16-e10,51.67,31.72,76.04,48.01,48.9,53.66
23
+ multilingual-e5-LARGE-stsb-tuned-b16-e15,67.56,43.35,90.98,67.27,65.35,70.87
24
+ multilingual-e5-LARGE-stsb-tuned-b64-e10-all-languages,64.85,43.91,87.2,62.25,63.09,67.83
25
+ multilingual-e5-LARGE-STSAUGMENTED-b16-e5,63.11,44.38,85.51,57.27,62.15,66.25
data/general.csv CHANGED
@@ -1,19 +1,25 @@
1
  Model name,Model Size (GB),Embedding Dimensions,Average,Classification Average,Clustering Average,STS Average,Retrieval Average
2
- multilingual-e5-large,,,72.22,66.6,,77.83,
3
- bge-small-en-v1.5,,,59.73,52.86,,66.6,
4
- multilingual-e5-base,,,70.7,63.88,,77.53,
5
- multilingual-e5-small,,,68.64,61.13,,76.15,
6
- paraphrase-multilingual-mpnet-base-v2,,,69.1,65.68,,72.53,
7
- sentence-t5-large,,,64.04,57.88,,70.21,
8
  sentence-t5-xl,,,66.22,61.64,,70.79,
9
  paraphrase-spanish-distilroberta,,,69.34,63.98,,74.7,
10
  sentence_similarity_spanish_es,,,68.5,61.77,,75.22,
11
- paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings,,,68.62,64.0,,73.25,
12
- mstsb-paraphrase-multilingual-mpnet-base-v2,,,69.39,64.48,,74.29,
13
- multilingual-e5-base-b16-e10,,,71.97,65.09,,78.86,
14
- multilingual-e5-large-stsb-tuned-b32-e10,,,72.73,66.19,,79.27,
15
- multilingual-e5-large-stsb-tuned-b16-e10,,,73.07,67.1,,79.05,
16
- multilingual-e5-large-stsb-tuned,,,72.84,66.23,,79.46,
17
- multilingual-e5-large-stsb-tuned-b64-e10,,,71.83,64.58,,79.08,
18
  LaBSE,,,66.99,61.97,,72.01,
19
- multilingual-e5-base-stsb-tuned-b16-e10-all-languages,,,71.88,64.39,,79.38,
 
 
 
 
 
 
 
1
  Model name,Model Size (GB),Embedding Dimensions,Average,Classification Average,Clustering Average,STS Average,Retrieval Average
2
+ multilingual-e5-LARGE,,,72.22,66.6,,77.83,
3
+ bge-SMALL-en-v1.5,,,59.73,52.86,,66.6,
4
+ multilingual-e5-BASE,,,70.7,63.88,,77.53,
5
+ multilingual-e5-SMALL,,,68.64,61.13,,76.15,
6
+ paraphrase-multilingual-mpnet-BASE-v2,,,69.1,65.68,,72.53,
7
+ sentence-t5-LARGE,,,64.04,57.88,,70.21,
8
  sentence-t5-xl,,,66.22,61.64,,70.79,
9
  paraphrase-spanish-distilroberta,,,69.34,63.98,,74.7,
10
  sentence_similarity_spanish_es,,,68.5,61.77,,75.22,
11
+ paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,,,68.62,64.0,,73.25,
12
+ mstsb-paraphrase-multilingual-mpnet-BASE-v2,,,69.39,64.48,,74.29,
13
+ multilingual-e5-BASE-b16-e10,,,71.97,65.09,,78.86,
14
+ multilingual-e5-LARGE-stsb-tuned-b32-e10,,,72.73,66.19,,79.27,
15
+ multilingual-e5-LARGE-stsb-tuned-b16-e10,,,73.07,67.1,,79.05,
16
+ multilingual-e5-LARGE-stsb-tuned-b16-e5,,,72.84,66.23,,79.46,
17
+ multilingual-e5-LARGE-stsb-tuned-b64-e10,,,71.83,64.58,,79.08,
18
  LaBSE,,,66.99,61.97,,72.01,
19
+ multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,,,71.88,64.39,,79.38,
20
+ bge-BASE-tuned-b16-e10,,,59.69,50.83,,68.55,
21
+ multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,,,72.22,64.93,,79.5,
22
+ bge-LARGE-tuned-b16-e10,,,61.5,51.67,,71.34,
23
+ multilingual-e5-LARGE-stsb-tuned-b16-e15,,,73.38,67.56,,79.19,
24
+ multilingual-e5-LARGE-stsb-tuned-b64-e10-all-languages,,,71.68,64.85,,78.52,
25
+ multilingual-e5-LARGE-STSAUGMENTED-b16-e5,,,71.28,63.11,,79.44,
data/sts.csv CHANGED
@@ -1,19 +1,25 @@
1
  Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
2
- multilingual-e5-large,77.82,87.42,68.23
3
- bge-small-en-v1.5,66.6,77.73,55.47
4
- multilingual-e5-base,77.52,87.26,67.79
5
- multilingual-e5-small,76.15,85.27,67.04
6
- paraphrase-multilingual-mpnet-base-v2,72.52,85.14,59.91
7
- sentence-t5-large,70.21,82.74,57.68
8
  sentence-t5-xl,70.78,83.42,58.16
9
  paraphrase-spanish-distilroberta,74.7,85.79,63.61
10
  sentence_similarity_spanish_es,75.22,85.37,65.07
11
- paraphrase-multilingual-mpnet-base-v2-ft-stsb_multi_mt-embeddings,73.24,86.89,59.6
12
- mstsb-paraphrase-multilingual-mpnet-base-v2,74.28,88.22,60.36
13
- multilingual-e5-base-b16-e10,78.86,87.51,70.21
14
- multilingual-e5-large-stsb-tuned-b32-e10,79.27,88.1,70.44
15
- multilingual-e5-large-stsb-tuned-b16-e10,79.05,88.53,69.58
16
- multilingual-e5-large-stsb-tuned,79.46,88.44,70.48
17
- multilingual-e5-large-stsb-tuned-b64-e10,79.08,88.03,70.12
18
  LaBSE,72.01,80.83,63.18
19
- multilingual-e5-base-stsb-tuned-b16-e10-all-languages,79.38,87.77,70.99
 
 
 
 
 
 
 
1
  Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
2
+ multilingual-e5-LARGE,77.82,87.42,68.23
3
+ bge-SMALL-en-v1.5,66.6,77.73,55.47
4
+ multilingual-e5-BASE,77.52,87.26,67.79
5
+ multilingual-e5-SMALL,76.15,85.27,67.04
6
+ paraphrase-multilingual-mpnet-BASE-v2,72.52,85.14,59.91
7
+ sentence-t5-LARGE,70.21,82.74,57.68
8
  sentence-t5-xl,70.78,83.42,58.16
9
  paraphrase-spanish-distilroberta,74.7,85.79,63.61
10
  sentence_similarity_spanish_es,75.22,85.37,65.07
11
+ paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,73.24,86.89,59.6
12
+ mstsb-paraphrase-multilingual-mpnet-BASE-v2,74.28,88.22,60.36
13
+ multilingual-e5-BASE-b16-e10,78.86,87.51,70.21
14
+ multilingual-e5-LARGE-stsb-tuned-b32-e10,79.27,88.1,70.44
15
+ multilingual-e5-LARGE-stsb-tuned-b16-e10,79.05,88.53,69.58
16
+ multilingual-e5-LARGE-stsb-tuned-b16-e5,79.46,88.44,70.48
17
+ multilingual-e5-LARGE-stsb-tuned-b64-e10,79.08,88.03,70.12
18
  LaBSE,72.01,80.83,63.18
19
+ multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,79.38,87.77,70.99
20
+ bge-BASE-tuned-b16-e10,68.55,82.01,55.08
21
+ multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,79.5,88.66,70.35
22
+ bge-LARGE-tuned-b16-e10,71.34,81.57,61.12
23
+ multilingual-e5-LARGE-stsb-tuned-b16-e15,79.19,89.04,69.34
24
+ multilingual-e5-LARGE-stsb-tuned-b64-e10-all-languages,78.52,88.37,68.67
25
+ multilingual-e5-LARGE-STSAUGMENTED-b16-e5,79.44,88.46,70.41