Spaces:
Runtime error
Runtime error
Santi Diana
commited on
Commit
·
8a7bf5e
1
Parent(s):
130e3bd
Updated to 24 models. 10 models outperform the current SOTA
Browse files- .DS_Store +0 -0
- add_new_model/mteb_metadata.yaml +23 -23
- app.py +1 -1
- data/classification.csv +20 -14
- data/general.csv +20 -14
- data/sts.csv +20 -14
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
add_new_model/mteb_metadata.yaml
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
tags:
|
3 |
- mteb
|
4 |
model-index:
|
5 |
-
- name: multilingual-e5-
|
6 |
results:
|
7 |
- task:
|
8 |
type: Classification
|
@@ -14,9 +14,9 @@ model-index:
|
|
14 |
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
|
15 |
metrics:
|
16 |
- type: accuracy
|
17 |
-
value:
|
18 |
- type: f1
|
19 |
-
value: 40.
|
20 |
- task:
|
21 |
type: Classification
|
22 |
dataset:
|
@@ -27,9 +27,9 @@ model-index:
|
|
27 |
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
|
28 |
metrics:
|
29 |
- type: accuracy
|
30 |
-
value:
|
31 |
- type: f1
|
32 |
-
value:
|
33 |
- task:
|
34 |
type: Classification
|
35 |
dataset:
|
@@ -40,9 +40,9 @@ model-index:
|
|
40 |
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
|
41 |
metrics:
|
42 |
- type: accuracy
|
43 |
-
value:
|
44 |
- type: f1
|
45 |
-
value:
|
46 |
- task:
|
47 |
type: Classification
|
48 |
dataset:
|
@@ -53,9 +53,9 @@ model-index:
|
|
53 |
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
|
54 |
metrics:
|
55 |
- type: accuracy
|
56 |
-
value:
|
57 |
- type: f1
|
58 |
-
value:
|
59 |
- task:
|
60 |
type: Classification
|
61 |
dataset:
|
@@ -66,9 +66,9 @@ model-index:
|
|
66 |
revision: 7d571f92784cd94a019292a1f45445077d0ef634
|
67 |
metrics:
|
68 |
- type: accuracy
|
69 |
-
value:
|
70 |
- type: f1
|
71 |
-
value:
|
72 |
- task:
|
73 |
type: STS
|
74 |
dataset:
|
@@ -79,17 +79,17 @@ model-index:
|
|
79 |
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
|
80 |
metrics:
|
81 |
- type: cos_sim_pearson
|
82 |
-
value: 88.
|
83 |
- type: cos_sim_spearman
|
84 |
-
value:
|
85 |
- type: euclidean_pearson
|
86 |
-
value:
|
87 |
- type: euclidean_spearman
|
88 |
-
value:
|
89 |
- type: manhattan_pearson
|
90 |
-
value:
|
91 |
- type: manhattan_spearman
|
92 |
-
value:
|
93 |
- task:
|
94 |
type: STS
|
95 |
dataset:
|
@@ -100,15 +100,15 @@ model-index:
|
|
100 |
revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
|
101 |
metrics:
|
102 |
- type: cos_sim_pearson
|
103 |
-
value:
|
104 |
- type: cos_sim_spearman
|
105 |
-
value: 70.
|
106 |
- type: euclidean_pearson
|
107 |
-
value:
|
108 |
- type: euclidean_spearman
|
109 |
-
value: 70.
|
110 |
- type: manhattan_pearson
|
111 |
-
value:
|
112 |
- type: manhattan_spearman
|
113 |
-
value:
|
114 |
---
|
|
|
2 |
tags:
|
3 |
- mteb
|
4 |
model-index:
|
5 |
+
- name: multilingual-e5-large-STSAUGMENTED-b16-e10
|
6 |
results:
|
7 |
- task:
|
8 |
type: Classification
|
|
|
14 |
revision: 1399c76144fd37290681b995c656ef9b2e06e26d
|
15 |
metrics:
|
16 |
- type: accuracy
|
17 |
+
value: 42.114
|
18 |
- type: f1
|
19 |
+
value: 40.962817058318876
|
20 |
- task:
|
21 |
type: Classification
|
22 |
dataset:
|
|
|
27 |
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
|
28 |
metrics:
|
29 |
- type: accuracy
|
30 |
+
value: 80.92394929953302
|
31 |
- type: f1
|
32 |
+
value: 80.87019762034018
|
33 |
- task:
|
34 |
type: Classification
|
35 |
dataset:
|
|
|
40 |
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
|
41 |
metrics:
|
42 |
- type: accuracy
|
43 |
+
value: 53.32555036691128
|
44 |
- type: f1
|
45 |
+
value: 37.93993994854238
|
46 |
- task:
|
47 |
type: Classification
|
48 |
dataset:
|
|
|
53 |
revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
|
54 |
metrics:
|
55 |
- type: accuracy
|
56 |
+
value: 60.178211163416286
|
57 |
- type: f1
|
58 |
+
value: 60.03972849482204
|
59 |
- task:
|
60 |
type: Classification
|
61 |
dataset:
|
|
|
66 |
revision: 7d571f92784cd94a019292a1f45445077d0ef634
|
67 |
metrics:
|
68 |
- type: accuracy
|
69 |
+
value: 64.1930060524546
|
70 |
- type: f1
|
71 |
+
value: 64.02551940330545
|
72 |
- task:
|
73 |
type: STS
|
74 |
dataset:
|
|
|
79 |
revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
|
80 |
metrics:
|
81 |
- type: cos_sim_pearson
|
82 |
+
value: 88.27146953394649
|
83 |
- type: cos_sim_spearman
|
84 |
+
value: 88.57657349986717
|
85 |
- type: euclidean_pearson
|
86 |
+
value: 89.07302053356283
|
87 |
- type: euclidean_spearman
|
88 |
+
value: 88.57431075359631
|
89 |
- type: manhattan_pearson
|
90 |
+
value: 89.11306099359507
|
91 |
- type: manhattan_spearman
|
92 |
+
value: 88.61591595032135
|
93 |
- task:
|
94 |
type: STS
|
95 |
dataset:
|
|
|
100 |
revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80
|
101 |
metrics:
|
102 |
- type: cos_sim_pearson
|
103 |
+
value: 65.57570700756301
|
104 |
- type: cos_sim_spearman
|
105 |
+
value: 70.33230514473709
|
106 |
- type: euclidean_pearson
|
107 |
+
value: 66.56411956377407
|
108 |
- type: euclidean_spearman
|
109 |
+
value: 70.33230514473709
|
110 |
- type: manhattan_pearson
|
111 |
+
value: 66.32203605146594
|
112 |
- type: manhattan_spearman
|
113 |
+
value: 70.34809182527435
|
114 |
---
|
app.py
CHANGED
@@ -37,7 +37,7 @@ def retrieval_dataframe_update():
|
|
37 |
block = gr.Blocks()
|
38 |
with block:
|
39 |
gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
|
40 |
-
Massive Text Embedding Benchmark (
|
41 |
- **Total Datasets**: {NUM_DATASETS}
|
42 |
- **Total Languages**: 1
|
43 |
- **Total Scores**: {NUM_SCORES}
|
|
|
37 |
block = gr.Blocks()
|
38 |
with block:
|
39 |
gr.Markdown(f"""**Leaderboard de modelos de Embeddings en español
|
40 |
+
Massive Spanish Text Embedding Benchmark (MSTEB) Leaderboard.**
|
41 |
- **Total Datasets**: {NUM_DATASETS}
|
42 |
- **Total Languages**: 1
|
43 |
- **Total Scores**: {NUM_SCORES}
|
data/classification.csv
CHANGED
@@ -1,19 +1,25 @@
|
|
1 |
Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
|
2 |
-
multilingual-e5-
|
3 |
-
bge-
|
4 |
-
multilingual-e5-
|
5 |
-
multilingual-e5-
|
6 |
-
paraphrase-multilingual-mpnet-
|
7 |
-
sentence-t5-
|
8 |
sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
|
9 |
paraphrase-spanish-distilroberta,63.98,38.24,86.81,65.94,60.52,68.39
|
10 |
sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.51,64.21
|
11 |
-
paraphrase-multilingual-mpnet-
|
12 |
-
mstsb-paraphrase-multilingual-mpnet-
|
13 |
-
multilingual-e5-
|
14 |
-
multilingual-e5-
|
15 |
-
multilingual-e5-
|
16 |
-
multilingual-e5-
|
17 |
-
multilingual-e5-
|
18 |
LaBSE,61.97,39.39,84.07,64.44,58.32,63.61
|
19 |
-
multilingual-e5-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model name,Average,MTEB AmazonReviewsClassification (es),MTEB MTOPDomainClassification (es),MTEB MTOPIntentClassification (es),MTEB MassiveIntentClassification (es),MTEB MassiveScenarioClassification (es)
|
2 |
+
multilingual-e5-LARGE,66.59,42.66,89.95,66.84,64.68,68.85
|
3 |
+
bge-SMALL-en-v1.5,52.86,32.03,76.93,52.15,48.77,54.42
|
4 |
+
multilingual-e5-BASE,63.87,42.47,89.62,60.27,60.51,66.52
|
5 |
+
multilingual-e5-SMALL,61.13,41.3,87.33,55.87,58.06,63.1
|
6 |
+
paraphrase-multilingual-mpnet-BASE-v2,65.67,39.99,86.96,66.59,64.43,70.42
|
7 |
+
sentence-t5-LARGE,57.87,42.89,80.78,52.07,54.1,59.56
|
8 |
sentence-t5-xl,61.64,45.01,85.32,57.38,57.97,62.52
|
9 |
paraphrase-spanish-distilroberta,63.98,38.24,86.81,65.94,60.52,68.39
|
10 |
sentence_similarity_spanish_es,61.77,35.08,85.86,65.21,58.51,64.21
|
11 |
+
paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,64.0,37.25,86.93,66.28,62.6,66.96
|
12 |
+
mstsb-paraphrase-multilingual-mpnet-BASE-v2,64.47,38.29,86.04,67.06,63.47,67.53
|
13 |
+
multilingual-e5-BASE-b16-e10,65.09,43.4,89.02,61.7,63.06,68.25
|
14 |
+
multilingual-e5-LARGE-stsb-tuned-b32-e10,66.19,43.31,89.3,64.04,64.62,69.69
|
15 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e10,67.1,43.72,90.29,65.51,65.13,70.84
|
16 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e5,66.23,43.62,89.33,62.93,65.11,70.16
|
17 |
+
multilingual-e5-LARGE-stsb-tuned-b64-e10,64.58,43.71,88.84,60.2,62.74,67.4
|
18 |
LaBSE,61.97,39.39,84.07,64.44,58.32,63.61
|
19 |
+
multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,64.39,43.04,88.81,59.25,63.04,67.8
|
20 |
+
bge-BASE-tuned-b16-e10,50.83,31.34,74.1,45.63,48.72,54.36
|
21 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,64.93,42.98,87.96,60.91,63.78,69.02
|
22 |
+
bge-LARGE-tuned-b16-e10,51.67,31.72,76.04,48.01,48.9,53.66
|
23 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e15,67.56,43.35,90.98,67.27,65.35,70.87
|
24 |
+
multilingual-e5-LARGE-stsb-tuned-b64-e10-all-languages,64.85,43.91,87.2,62.25,63.09,67.83
|
25 |
+
multilingual-e5-LARGE-STSAUGMENTED-b16-e5,63.11,44.38,85.51,57.27,62.15,66.25
|
data/general.csv
CHANGED
@@ -1,19 +1,25 @@
|
|
1 |
Model name,Model Size (GB),Embedding Dimensions,Average,Classification Average,Clustering Average,STS Average,Retrieval Average
|
2 |
-
multilingual-e5-
|
3 |
-
bge-
|
4 |
-
multilingual-e5-
|
5 |
-
multilingual-e5-
|
6 |
-
paraphrase-multilingual-mpnet-
|
7 |
-
sentence-t5-
|
8 |
sentence-t5-xl,,,66.22,61.64,,70.79,
|
9 |
paraphrase-spanish-distilroberta,,,69.34,63.98,,74.7,
|
10 |
sentence_similarity_spanish_es,,,68.5,61.77,,75.22,
|
11 |
-
paraphrase-multilingual-mpnet-
|
12 |
-
mstsb-paraphrase-multilingual-mpnet-
|
13 |
-
multilingual-e5-
|
14 |
-
multilingual-e5-
|
15 |
-
multilingual-e5-
|
16 |
-
multilingual-e5-
|
17 |
-
multilingual-e5-
|
18 |
LaBSE,,,66.99,61.97,,72.01,
|
19 |
-
multilingual-e5-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model name,Model Size (GB),Embedding Dimensions,Average,Classification Average,Clustering Average,STS Average,Retrieval Average
|
2 |
+
multilingual-e5-LARGE,,,72.22,66.6,,77.83,
|
3 |
+
bge-SMALL-en-v1.5,,,59.73,52.86,,66.6,
|
4 |
+
multilingual-e5-BASE,,,70.7,63.88,,77.53,
|
5 |
+
multilingual-e5-SMALL,,,68.64,61.13,,76.15,
|
6 |
+
paraphrase-multilingual-mpnet-BASE-v2,,,69.1,65.68,,72.53,
|
7 |
+
sentence-t5-LARGE,,,64.04,57.88,,70.21,
|
8 |
sentence-t5-xl,,,66.22,61.64,,70.79,
|
9 |
paraphrase-spanish-distilroberta,,,69.34,63.98,,74.7,
|
10 |
sentence_similarity_spanish_es,,,68.5,61.77,,75.22,
|
11 |
+
paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,,,68.62,64.0,,73.25,
|
12 |
+
mstsb-paraphrase-multilingual-mpnet-BASE-v2,,,69.39,64.48,,74.29,
|
13 |
+
multilingual-e5-BASE-b16-e10,,,71.97,65.09,,78.86,
|
14 |
+
multilingual-e5-LARGE-stsb-tuned-b32-e10,,,72.73,66.19,,79.27,
|
15 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e10,,,73.07,67.1,,79.05,
|
16 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e5,,,72.84,66.23,,79.46,
|
17 |
+
multilingual-e5-LARGE-stsb-tuned-b64-e10,,,71.83,64.58,,79.08,
|
18 |
LaBSE,,,66.99,61.97,,72.01,
|
19 |
+
multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,,,71.88,64.39,,79.38,
|
20 |
+
bge-BASE-tuned-b16-e10,,,59.69,50.83,,68.55,
|
21 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,,,72.22,64.93,,79.5,
|
22 |
+
bge-LARGE-tuned-b16-e10,,,61.5,51.67,,71.34,
|
23 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e15,,,73.38,67.56,,79.19,
|
24 |
+
multilingual-e5-LARGE-stsb-tuned-b64-e10-all-languages,,,71.68,64.85,,78.52,
|
25 |
+
multilingual-e5-LARGE-STSAUGMENTED-b16-e5,,,71.28,63.11,,79.44,
|
data/sts.csv
CHANGED
@@ -1,19 +1,25 @@
|
|
1 |
Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
|
2 |
-
multilingual-e5-
|
3 |
-
bge-
|
4 |
-
multilingual-e5-
|
5 |
-
multilingual-e5-
|
6 |
-
paraphrase-multilingual-mpnet-
|
7 |
-
sentence-t5-
|
8 |
sentence-t5-xl,70.78,83.42,58.16
|
9 |
paraphrase-spanish-distilroberta,74.7,85.79,63.61
|
10 |
sentence_similarity_spanish_es,75.22,85.37,65.07
|
11 |
-
paraphrase-multilingual-mpnet-
|
12 |
-
mstsb-paraphrase-multilingual-mpnet-
|
13 |
-
multilingual-e5-
|
14 |
-
multilingual-e5-
|
15 |
-
multilingual-e5-
|
16 |
-
multilingual-e5-
|
17 |
-
multilingual-e5-
|
18 |
LaBSE,72.01,80.83,63.18
|
19 |
-
multilingual-e5-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
Model name,Average,MTEB STS17 (es-es),MTEB STS22 (es)
|
2 |
+
multilingual-e5-LARGE,77.82,87.42,68.23
|
3 |
+
bge-SMALL-en-v1.5,66.6,77.73,55.47
|
4 |
+
multilingual-e5-BASE,77.52,87.26,67.79
|
5 |
+
multilingual-e5-SMALL,76.15,85.27,67.04
|
6 |
+
paraphrase-multilingual-mpnet-BASE-v2,72.52,85.14,59.91
|
7 |
+
sentence-t5-LARGE,70.21,82.74,57.68
|
8 |
sentence-t5-xl,70.78,83.42,58.16
|
9 |
paraphrase-spanish-distilroberta,74.7,85.79,63.61
|
10 |
sentence_similarity_spanish_es,75.22,85.37,65.07
|
11 |
+
paraphrase-multilingual-mpnet-BASE-v2-ft-stsb_multi_mt-embeddings,73.24,86.89,59.6
|
12 |
+
mstsb-paraphrase-multilingual-mpnet-BASE-v2,74.28,88.22,60.36
|
13 |
+
multilingual-e5-BASE-b16-e10,78.86,87.51,70.21
|
14 |
+
multilingual-e5-LARGE-stsb-tuned-b32-e10,79.27,88.1,70.44
|
15 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e10,79.05,88.53,69.58
|
16 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e5,79.46,88.44,70.48
|
17 |
+
multilingual-e5-LARGE-stsb-tuned-b64-e10,79.08,88.03,70.12
|
18 |
LaBSE,72.01,80.83,63.18
|
19 |
+
multilingual-e5-BASE-stsb-tuned-b16-e10-all-languages,79.38,87.77,70.99
|
20 |
+
bge-BASE-tuned-b16-e10,68.55,82.01,55.08
|
21 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e10-all-languages,79.5,88.66,70.35
|
22 |
+
bge-LARGE-tuned-b16-e10,71.34,81.57,61.12
|
23 |
+
multilingual-e5-LARGE-stsb-tuned-b16-e15,79.19,89.04,69.34
|
24 |
+
multilingual-e5-LARGE-stsb-tuned-b64-e10-all-languages,78.52,88.37,68.67
|
25 |
+
multilingual-e5-LARGE-STSAUGMENTED-b16-e5,79.44,88.46,70.41
|