diff --git a/eval/beir.json b/evaluation/beir/beir.json similarity index 100% rename from eval/beir.json rename to evaluation/beir/beir.json diff --git a/evaluation/mteb/AmazonCounterfactualClassification.json b/evaluation/mteb/AmazonCounterfactualClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..0309e4a5c74e6e68ab5442af10f0bd2e9176899e --- /dev/null +++ b/evaluation/mteb/AmazonCounterfactualClassification.json @@ -0,0 +1,16 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.652089552238806, + "accuracy_stderr": 0.04707742824740793, + "ap": 0.2959212705444778, + "ap_stderr": 0.022393345886320606, + "f1": 0.5997099864321921, + "f1_stderr": 0.036697739411917986, + "main_score": 0.652089552238806 + }, + "evaluation_time": 23.71 + } +} \ No newline at end of file diff --git a/evaluation/mteb/AmazonPolarityClassification.json b/evaluation/mteb/AmazonPolarityClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..c4ded100e1981921380e45af349cae5715f6afe2 --- /dev/null +++ b/evaluation/mteb/AmazonPolarityClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.7320565, + "accuracy_stderr": 0.04094219950430119, + "ap": 0.6736680643550963, + "ap_stderr": 0.03652809962256317, + "evaluation_time": 7801.59, + "f1": 0.7290420520325125, + "f1_stderr": 0.04396494850187113, + "main_score": 0.7320565 + } +} \ No newline at end of file diff --git a/evaluation/mteb/AmazonReviewsClassification.json b/evaluation/mteb/AmazonReviewsClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..16c0c24e52c6b86286bb9bb78341f9b8444f9077 --- /dev/null +++ b/evaluation/mteb/AmazonReviewsClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.34956, + "accuracy_stderr": 0.018705464442242543, + "f1": 0.3471932443769695, + "f1_stderr": 0.017698150667922033, + "main_score": 0.34956 + }, + "evaluation_time": 73.75 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ArguAna.json b/evaluation/mteb/ArguAna.json new file mode 100644 index 0000000000000000000000000000000000000000..ed61cf17c5078716faab767954c2f12070709fd2 --- /dev/null +++ b/evaluation/mteb/ArguAna.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 159.23, + "map_at_1": 0.26102, + "map_at_10": 0.40958, + "map_at_100": 0.42033, + "map_at_1000": 0.42042, + "map_at_3": 0.36332, + "map_at_5": 0.38608, + "mrr_at_1": 0.26387, + "mrr_at_10": 0.41051, + "mrr_at_100": 0.42118, + "mrr_at_1000": 0.42127, + "mrr_at_3": 0.36415, + "mrr_at_5": 0.3872, + "ndcg_at_1": 0.26102, + "ndcg_at_10": 0.4968, + "ndcg_at_100": 0.54258, + "ndcg_at_1000": 0.54486, + "ndcg_at_3": 0.39864, + "ndcg_at_5": 0.4398, + "precision_at_1": 0.26102, + "precision_at_10": 0.07781, + "precision_at_100": 0.00979, + "precision_at_1000": 0.001, + "precision_at_3": 0.16714, + "precision_at_5": 0.12034, + "recall_at_1": 0.26102, + "recall_at_10": 0.77809, + "recall_at_100": 0.97866, + "recall_at_1000": 0.99644, + "recall_at_3": 0.50142, + "recall_at_5": 0.60171 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ArxivClusteringP2P.json b/evaluation/mteb/ArxivClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..864471df21bcbd705b4b72af613bee42c0f1046e --- /dev/null +++ b/evaluation/mteb/ArxivClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 28476.56, + "v_measure": 0.4338419491695377, + "v_measure_std": 0.13772932292188322 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ArxivClusteringS2S.json b/evaluation/mteb/ArxivClusteringS2S.json new file mode 100644 index 0000000000000000000000000000000000000000..1a3484edb70e71a262255dd67112486d94c30e70 --- /dev/null +++ b/evaluation/mteb/ArxivClusteringS2S.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 3597.82, + "v_measure": 0.3370962633433912, + "v_measure_std": 0.14057785790111033 + } +} \ No newline at end of file diff --git a/evaluation/mteb/AskUbuntuDupQuestions.json b/evaluation/mteb/AskUbuntuDupQuestions.json new file mode 100644 index 0000000000000000000000000000000000000000..9595e975e495b061e4802cfaf0afac3a4f3951c0 --- /dev/null +++ b/evaluation/mteb/AskUbuntuDupQuestions.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 29.27, + "map": 0.5813305899687008, + "mrr": 0.7210922041946972 + } +} \ No newline at end of file diff --git a/evaluation/mteb/BIOSSES.json b/evaluation/mteb/BIOSSES.json new file mode 100644 index 0000000000000000000000000000000000000000..c250af7223ed5b944ad9acedbaaa0af80c988206 --- /dev/null +++ b/evaluation/mteb/BIOSSES.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.8662153841660047, + "spearman": 0.8301514456843275 + }, + "euclidean": { + "pearson": 0.8600431518427242, + "spearman": 0.8385552516285784 + }, + "evaluation_time": 4.47, + "manhattan": { + "pearson": 0.8583025803351181, + "spearman": 0.8386636878343106 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/Banking77Classification.json b/evaluation/mteb/Banking77Classification.json new file mode 100644 index 0000000000000000000000000000000000000000..c72bcec1de0804a09e7dc59abe4d58e90e75d794 --- /dev/null +++ b/evaluation/mteb/Banking77Classification.json @@ -0,0 +1,12 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.8205844155844156, + "accuracy_stderr": 0.00898031054024437, + "evaluation_time": 70.71, + "f1": 0.8201858378847641, + "f1_stderr": 0.009423967637409204, + "main_score": 0.8205844155844156 + } +} \ No newline at end of file diff --git a/evaluation/mteb/BiorxivClusteringP2P.json b/evaluation/mteb/BiorxivClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..9114e4b0c56ca2a7d9f31cf023c07f77aa9b14f4 --- /dev/null +++ b/evaluation/mteb/BiorxivClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 3352.83, + "v_measure": 0.3505918333141837, + "v_measure_std": 0.00847894603883707 + } +} \ No newline at end of file diff --git a/evaluation/mteb/BiorxivClusteringS2S.json b/evaluation/mteb/BiorxivClusteringS2S.json new file mode 100644 index 0000000000000000000000000000000000000000..92eecfb53158c9443fdfe9ecb682c922ede3155d --- /dev/null +++ b/evaluation/mteb/BiorxivClusteringS2S.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 435.49, + "v_measure": 0.3071055028830579, + "v_measure_std": 0.008981834294010968 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackAndroidRetrieval.json b/evaluation/mteb/CQADupstackAndroidRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..34cc232a6e6eac20f093794b33b78c0ad9d9d814 --- /dev/null +++ b/evaluation/mteb/CQADupstackAndroidRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 575.19, + "map_at_1": 0.26519, + "map_at_10": 0.35634, + "map_at_100": 0.36961, + "map_at_1000": 0.37088, + "map_at_3": 0.32254, + "map_at_5": 0.3422, + "mrr_at_1": 0.32332, + "mrr_at_10": 0.41168, + "mrr_at_100": 0.41977, + "mrr_at_1000": 0.42029, + "mrr_at_3": 0.38197, + "mrr_at_5": 0.40036, + "ndcg_at_1": 0.32332, + "ndcg_at_10": 0.41471, + "ndcg_at_100": 0.46956, + "ndcg_at_1000": 0.49262, + "ndcg_at_3": 0.35938, + "ndcg_at_5": 0.38703, + "precision_at_1": 0.32332, + "precision_at_10": 0.07783, + "precision_at_100": 0.0129, + "precision_at_1000": 0.00178, + "precision_at_3": 0.16834, + "precision_at_5": 0.12418, + "recall_at_1": 0.26519, + "recall_at_10": 0.5319, + "recall_at_100": 0.76565, + "recall_at_1000": 0.91478, + "recall_at_3": 0.38034, + "recall_at_5": 0.45246 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackEnglishRetrieval.json b/evaluation/mteb/CQADupstackEnglishRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..38ce4fa8449e0dd0a7bf40ef342643fe2d4e57f9 --- /dev/null +++ b/evaluation/mteb/CQADupstackEnglishRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 874.22, + "map_at_1": 0.25356, + "map_at_10": 0.34596, + "map_at_100": 0.35714, + "map_at_1000": 0.3584, + "map_at_3": 0.32073, + "map_at_5": 0.33475, + "mrr_at_1": 0.31274, + "mrr_at_10": 0.39592, + "mrr_at_100": 0.40284, + "mrr_at_1000": 0.4034, + "mrr_at_3": 0.37378, + "mrr_at_5": 0.38658, + "ndcg_at_1": 0.31274, + "ndcg_at_10": 0.39766, + "ndcg_at_100": 0.44028, + "ndcg_at_1000": 0.46445, + "ndcg_at_3": 0.35934, + "ndcg_at_5": 0.37751, + "precision_at_1": 0.31274, + "precision_at_10": 0.07452, + "precision_at_100": 0.01217, + "precision_at_1000": 0.0017, + "precision_at_3": 0.17431, + "precision_at_5": 0.12306, + "recall_at_1": 0.25356, + "recall_at_10": 0.49344, + "recall_at_100": 0.67497, + "recall_at_1000": 0.83372, + "recall_at_3": 0.38227, + "recall_at_5": 0.43188 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackGamingRetrieval.json b/evaluation/mteb/CQADupstackGamingRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..eff7b3c48286ed84c73f6bdd8a78a394f6e4d91a --- /dev/null +++ b/evaluation/mteb/CQADupstackGamingRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 931.63, + "map_at_1": 0.32759, + "map_at_10": 0.43937, + "map_at_100": 0.45004, + "map_at_1000": 0.4507, + "map_at_3": 0.40805, + "map_at_5": 0.42497, + "mrr_at_1": 0.37367, + "mrr_at_10": 0.47237, + "mrr_at_100": 0.47973, + "mrr_at_1000": 0.48011, + "mrr_at_3": 0.4465, + "mrr_at_5": 0.46051, + "ndcg_at_1": 0.37367, + "ndcg_at_10": 0.49659, + "ndcg_at_100": 0.54069, + "ndcg_at_1000": 0.55552, + "ndcg_at_3": 0.44169, + "ndcg_at_5": 0.46726, + "precision_at_1": 0.37367, + "precision_at_10": 0.08163, + "precision_at_100": 0.01133, + "precision_at_1000": 0.00131, + "precision_at_3": 0.19707, + "precision_at_5": 0.13718, + "recall_at_1": 0.32759, + "recall_at_10": 0.63341, + "recall_at_100": 0.82502, + "recall_at_1000": 0.93259, + "recall_at_3": 0.48796, + "recall_at_5": 0.54921 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackGisRetrieval.json b/evaluation/mteb/CQADupstackGisRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..5c2e008a76e18d4450e40ada6262da67fb08c26f --- /dev/null +++ b/evaluation/mteb/CQADupstackGisRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1290.88, + "map_at_1": 0.18962, + "map_at_10": 0.25863, + "map_at_100": 0.26818, + "map_at_1000": 0.26918, + "map_at_3": 0.23043, + "map_at_5": 0.24599, + "mrr_at_1": 0.20452, + "mrr_at_10": 0.27301, + "mrr_at_100": 0.28233, + "mrr_at_1000": 0.2831, + "mrr_at_3": 0.24539, + "mrr_at_5": 0.26109, + "ndcg_at_1": 0.20452, + "ndcg_at_10": 0.30355, + "ndcg_at_100": 0.35336, + "ndcg_at_1000": 0.37927, + "ndcg_at_3": 0.24705, + "ndcg_at_5": 0.2742, + "precision_at_1": 0.20452, + "precision_at_10": 0.04949, + "precision_at_100": 0.0078, + "precision_at_1000": 0.00104, + "precision_at_3": 0.10358, + "precision_at_5": 0.07774, + "recall_at_1": 0.18962, + "recall_at_10": 0.43056, + "recall_at_100": 0.66273, + "recall_at_1000": 0.8596, + "recall_at_3": 0.27776, + "recall_at_5": 0.34287 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackMathematicaRetrieval.json b/evaluation/mteb/CQADupstackMathematicaRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..e4154312566af98a6087d225082a2de594b64f22 --- /dev/null +++ b/evaluation/mteb/CQADupstackMathematicaRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 686.89, + "map_at_1": 0.1124, + "map_at_10": 0.18503, + "map_at_100": 0.19553, + "map_at_1000": 0.1969, + "map_at_3": 0.1615, + "map_at_5": 0.17254, + "mrr_at_1": 0.13806, + "mrr_at_10": 0.21939, + "mrr_at_100": 0.22827, + "mrr_at_1000": 0.22911, + "mrr_at_3": 0.1932, + "mrr_at_5": 0.20558, + "ndcg_at_1": 0.13806, + "ndcg_at_10": 0.23383, + "ndcg_at_100": 0.28834, + "ndcg_at_1000": 0.32175, + "ndcg_at_3": 0.18652, + "ndcg_at_5": 0.20505, + "precision_at_1": 0.13806, + "precision_at_10": 0.04714, + "precision_at_100": 0.00864, + "precision_at_1000": 0.0013, + "precision_at_3": 0.09328, + "precision_at_5": 0.06841, + "recall_at_1": 0.1124, + "recall_at_10": 0.34854, + "recall_at_100": 0.59503, + "recall_at_1000": 0.8325, + "recall_at_3": 0.2202, + "recall_at_5": 0.26715 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackPhysicsRetrieval.json b/evaluation/mteb/CQADupstackPhysicsRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..7d9e07fd57483c161192e1c0cce2e7b6f60ff922 --- /dev/null +++ b/evaluation/mteb/CQADupstackPhysicsRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1269.26, + "map_at_1": 0.23012, + "map_at_10": 0.33048, + "map_at_100": 0.34371, + "map_at_1000": 0.34489, + "map_at_3": 0.29943, + "map_at_5": 0.31602, + "mrr_at_1": 0.28104, + "mrr_at_10": 0.3799, + "mrr_at_100": 0.38836, + "mrr_at_1000": 0.38891, + "mrr_at_3": 0.35226, + "mrr_at_5": 0.36694, + "ndcg_at_1": 0.28104, + "ndcg_at_10": 0.39037, + "ndcg_at_100": 0.44643, + "ndcg_at_1000": 0.46939, + "ndcg_at_3": 0.33784, + "ndcg_at_5": 0.36126, + "precision_at_1": 0.28104, + "precision_at_10": 0.07267, + "precision_at_100": 0.01193, + "precision_at_1000": 0.00159, + "precision_at_3": 0.16298, + "precision_at_5": 0.11684, + "recall_at_1": 0.23012, + "recall_at_10": 0.52054, + "recall_at_100": 0.75622, + "recall_at_1000": 0.90675, + "recall_at_3": 0.37282, + "recall_at_5": 0.43307 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackProgrammersRetrieval.json b/evaluation/mteb/CQADupstackProgrammersRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..4bcad9028af57b0d36e2f17c47bd3edb0dfe9103 --- /dev/null +++ b/evaluation/mteb/CQADupstackProgrammersRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1185.95, + "map_at_1": 0.21624, + "map_at_10": 0.3021, + "map_at_100": 0.3152, + "map_at_1000": 0.31626, + "map_at_3": 0.26951, + "map_at_5": 0.28939, + "mrr_at_1": 0.26941, + "mrr_at_10": 0.3513, + "mrr_at_100": 0.3615, + "mrr_at_1000": 0.36204, + "mrr_at_3": 0.3242, + "mrr_at_5": 0.34155, + "ndcg_at_1": 0.26941, + "ndcg_at_10": 0.35726, + "ndcg_at_100": 0.41725, + "ndcg_at_1000": 0.44105, + "ndcg_at_3": 0.30184, + "ndcg_at_5": 0.33176, + "precision_at_1": 0.26941, + "precision_at_10": 0.06655, + "precision_at_100": 0.01152, + "precision_at_1000": 0.00152, + "precision_at_3": 0.14346, + "precision_at_5": 0.10868, + "recall_at_1": 0.21624, + "recall_at_10": 0.47359, + "recall_at_100": 0.73436, + "recall_at_1000": 0.89988, + "recall_at_3": 0.3234, + "recall_at_5": 0.39856 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackRetrieval.json b/evaluation/mteb/CQADupstackRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..4378c58773e63a069eaa91b741226bbcb223006d --- /dev/null +++ b/evaluation/mteb/CQADupstackRetrieval.json @@ -0,0 +1 @@ +{"dataset_version": null, "mteb_version": "0.0.2", "test": {"evaluation_time": 1798.61, "map_at_1": 0.2067566666666667, "map_at_10": 0.28479333333333334, "map_at_100": 0.29612249999999996, "map_at_1000": 0.29731166666666664, "map_at_3": 0.25884, "map_at_5": 0.27298916666666667, "mrr_at_1": 0.24402583333333333, "mrr_at_10": 0.3207041666666667, "mrr_at_100": 0.32958416666666673, "mrr_at_1000": 0.3302541666666666, "mrr_at_3": 0.29677749999999997, "mrr_at_5": 0.3102391666666667, "ndcg_at_1": 0.24402583333333333, "ndcg_at_10": 0.3332616666666667, "ndcg_at_100": 0.38515666666666665, "ndcg_at_1000": 0.4113791666666667, "ndcg_at_3": 0.28687749999999995, "ndcg_at_5": 0.3084766666666667, "precision_at_1": 0.24402583333333333, "precision_at_10": 0.05943749999999999, "precision_at_100": 0.010098333333333334, "precision_at_1000": 0.0014183333333333333, "precision_at_3": 0.132115, "precision_at_5": 0.09548416666666668, "recall_at_1": 0.2067566666666667, "recall_at_10": 0.4424558333333334, "recall_at_100": 0.6731116666666667, "recall_at_1000": 0.8587841666666666, "recall_at_3": 0.3149258333333333, "recall_at_5": 0.3693241666666667}} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackStatsRetrieval.json b/evaluation/mteb/CQADupstackStatsRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..85ba014a6cb618fe72c746c82961c6febdf14952 --- /dev/null +++ b/evaluation/mteb/CQADupstackStatsRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1614.26, + "map_at_1": 0.1834, + "map_at_10": 0.23988, + "map_at_100": 0.24895, + "map_at_1000": 0.24992, + "map_at_3": 0.21831, + "map_at_5": 0.23, + "mrr_at_1": 0.20399, + "mrr_at_10": 0.26186, + "mrr_at_100": 0.27018, + "mrr_at_1000": 0.27091, + "mrr_at_3": 0.2408, + "mrr_at_5": 0.2523, + "ndcg_at_1": 0.20399, + "ndcg_at_10": 0.27799, + "ndcg_at_100": 0.32579, + "ndcg_at_1000": 0.35209, + "ndcg_at_3": 0.23684, + "ndcg_at_5": 0.25521, + "precision_at_1": 0.20399, + "precision_at_10": 0.04586, + "precision_at_100": 0.00755, + "precision_at_1000": 0.00105, + "precision_at_3": 0.10276, + "precision_at_5": 0.07362, + "recall_at_1": 0.1834, + "recall_at_10": 0.37456, + "recall_at_100": 0.5986, + "recall_at_1000": 0.79703, + "recall_at_3": 0.26164, + "recall_at_5": 0.30652 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackTexRetrieval.json b/evaluation/mteb/CQADupstackTexRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..03126ad4d8a8f1b735e0ea5e8ae442f5f3db7cc2 --- /dev/null +++ b/evaluation/mteb/CQADupstackTexRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 2711.63, + "map_at_1": 0.12327, + "map_at_10": 0.17572, + "map_at_100": 0.18534, + "map_at_1000": 0.18653, + "map_at_3": 0.15703, + "map_at_5": 0.16752, + "mrr_at_1": 0.15038, + "mrr_at_10": 0.20726, + "mrr_at_100": 0.2161, + "mrr_at_1000": 0.21695, + "mrr_at_3": 0.18829, + "mrr_at_5": 0.19885, + "ndcg_at_1": 0.15038, + "ndcg_at_10": 0.21241, + "ndcg_at_100": 0.26179, + "ndcg_at_1000": 0.29316, + "ndcg_at_3": 0.17762, + "ndcg_at_5": 0.19413, + "precision_at_1": 0.15038, + "precision_at_10": 0.03892, + "precision_at_100": 0.0075, + "precision_at_1000": 0.00118, + "precision_at_3": 0.08351, + "precision_at_5": 0.06187, + "recall_at_1": 0.12327, + "recall_at_10": 0.29342, + "recall_at_100": 0.51854, + "recall_at_1000": 0.74648, + "recall_at_3": 0.19596, + "recall_at_5": 0.23899 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackUnixRetrieval.json b/evaluation/mteb/CQADupstackUnixRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..1dce84d87b6c13008420fa4ea610b551db7c8a0e --- /dev/null +++ b/evaluation/mteb/CQADupstackUnixRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1688.62, + "map_at_1": 0.20594, + "map_at_10": 0.27879, + "map_at_100": 0.28926, + "map_at_1000": 0.29041, + "map_at_3": 0.25669, + "map_at_5": 0.26774, + "mrr_at_1": 0.23694, + "mrr_at_10": 0.31335, + "mrr_at_100": 0.32218, + "mrr_at_1000": 0.32298, + "mrr_at_3": 0.2926, + "mrr_at_5": 0.30328, + "ndcg_at_1": 0.23694, + "ndcg_at_10": 0.32456, + "ndcg_at_100": 0.37667, + "ndcg_at_1000": 0.40571, + "ndcg_at_3": 0.28283, + "ndcg_at_5": 0.29986, + "precision_at_1": 0.23694, + "precision_at_10": 0.05448, + "precision_at_100": 0.00912, + "precision_at_1000": 0.00127, + "precision_at_3": 0.12718, + "precision_at_5": 0.08843, + "recall_at_1": 0.20594, + "recall_at_10": 0.43005, + "recall_at_100": 0.66228, + "recall_at_1000": 0.87171, + "recall_at_3": 0.31554, + "recall_at_5": 0.35838 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackWebmastersRetrieval.json b/evaluation/mteb/CQADupstackWebmastersRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..9e049b202aa126b317c439907a7a9b806882c9b4 --- /dev/null +++ b/evaluation/mteb/CQADupstackWebmastersRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 540.56, + "map_at_1": 0.20856, + "map_at_10": 0.28372, + "map_at_100": 0.2987, + "map_at_1000": 0.30075, + "map_at_3": 0.26054, + "map_at_5": 0.27129, + "mrr_at_1": 0.25494, + "mrr_at_10": 0.32735, + "mrr_at_100": 0.33794, + "mrr_at_1000": 0.3385, + "mrr_at_3": 0.30731, + "mrr_at_5": 0.31897, + "ndcg_at_1": 0.25494, + "ndcg_at_10": 0.33385, + "ndcg_at_100": 0.39436, + "ndcg_at_1000": 0.42313, + "ndcg_at_3": 0.29612, + "ndcg_at_5": 0.31187, + "precision_at_1": 0.25494, + "precision_at_10": 0.06423, + "precision_at_100": 0.01383, + "precision_at_1000": 0.00224, + "precision_at_3": 0.13834, + "precision_at_5": 0.1, + "recall_at_1": 0.20856, + "recall_at_10": 0.42678, + "recall_at_100": 0.70224, + "recall_at_1000": 0.89369, + "recall_at_3": 0.31957, + "recall_at_5": 0.36026 + } +} \ No newline at end of file diff --git a/evaluation/mteb/CQADupstackWordpressRetrieval.json b/evaluation/mteb/CQADupstackWordpressRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..5015b7573df432ca3d326c36cb98e3edbae06ea8 --- /dev/null +++ b/evaluation/mteb/CQADupstackWordpressRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1798.61, + "map_at_1": 0.16519, + "map_at_10": 0.2215, + "map_at_100": 0.23181, + "map_at_1000": 0.23292, + "map_at_3": 0.20132, + "map_at_5": 0.21346, + "mrr_at_1": 0.1793, + "mrr_at_10": 0.23506, + "mrr_at_100": 0.24581, + "mrr_at_1000": 0.24675, + "mrr_at_3": 0.21503, + "mrr_at_5": 0.22686, + "ndcg_at_1": 0.1793, + "ndcg_at_10": 0.25636, + "ndcg_at_100": 0.30736, + "ndcg_at_1000": 0.33841, + "ndcg_at_3": 0.21546, + "ndcg_at_5": 0.23658, + "precision_at_1": 0.1793, + "precision_at_10": 0.03993, + "precision_at_100": 0.00689, + "precision_at_1000": 0.00104, + "precision_at_3": 0.09057, + "precision_at_5": 0.0658, + "recall_at_1": 0.16519, + "recall_at_10": 0.35268, + "recall_at_100": 0.5817, + "recall_at_1000": 0.81668, + "recall_at_3": 0.24165, + "recall_at_5": 0.29254 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ClimateFEVER.json b/evaluation/mteb/ClimateFEVER.json new file mode 100644 index 0000000000000000000000000000000000000000..c779ef47733f78638d08e376a695bf7b9f8a6477 --- /dev/null +++ b/evaluation/mteb/ClimateFEVER.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 16391.42, + "map_at_1": 0.10363, + "map_at_10": 0.18301, + "map_at_100": 0.20019, + "map_at_1000": 0.20207, + "map_at_3": 0.14877, + "map_at_5": 0.16544, + "mrr_at_1": 0.22866, + "mrr_at_10": 0.34935, + "mrr_at_100": 0.35802, + "mrr_at_1000": 0.3584, + "mrr_at_3": 0.30966, + "mrr_at_5": 0.33204, + "ndcg_at_1": 0.22866, + "ndcg_at_10": 0.26595, + "ndcg_at_100": 0.33514, + "ndcg_at_1000": 0.36872, + "ndcg_at_3": 0.20667, + "ndcg_at_5": 0.22728, + "precision_at_1": 0.22866, + "precision_at_10": 0.08632, + "precision_at_100": 0.01612, + "precision_at_1000": 0.00224, + "precision_at_3": 0.15505, + "precision_at_5": 0.12404, + "recall_at_1": 0.10363, + "recall_at_10": 0.33494, + "recall_at_100": 0.57593, + "recall_at_1000": 0.76342, + "recall_at_3": 0.19157, + "recall_at_5": 0.24638 + } +} \ No newline at end of file diff --git a/evaluation/mteb/DBPedia.json b/evaluation/mteb/DBPedia.json new file mode 100644 index 0000000000000000000000000000000000000000..a7c9a4d796b644e835046379fadf69bfc9512174 --- /dev/null +++ b/evaluation/mteb/DBPedia.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 11184.99, + "map_at_1": 0.07436, + "map_at_10": 0.1476, + "map_at_100": 0.19206, + "map_at_1000": 0.20267, + "map_at_3": 0.10894, + "map_at_5": 0.12829, + "mrr_at_1": 0.5425, + "mrr_at_10": 0.63769, + "mrr_at_100": 0.64193, + "mrr_at_1000": 0.64211, + "mrr_at_3": 0.61458, + "mrr_at_5": 0.63096, + "ndcg_at_1": 0.42875, + "ndcg_at_10": 0.31507, + "ndcg_at_100": 0.34559, + "ndcg_at_1000": 0.41246, + "ndcg_at_3": 0.35058, + "ndcg_at_5": 0.33396, + "precision_at_1": 0.5425, + "precision_at_10": 0.2445, + "precision_at_100": 0.07383, + "precision_at_1000": 0.01582, + "precision_at_3": 0.38083, + "precision_at_5": 0.326, + "recall_at_1": 0.07436, + "recall_at_10": 0.19862, + "recall_at_100": 0.38981, + "recall_at_1000": 0.61038, + "recall_at_3": 0.11949, + "recall_at_5": 0.15562 + } +} \ No newline at end of file diff --git a/evaluation/mteb/EmotionClassification.json b/evaluation/mteb/EmotionClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..0d5f76a7eb07018941a02b10174c46a755c27a11 --- /dev/null +++ b/evaluation/mteb/EmotionClassification.json @@ -0,0 +1,12 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.46390000000000003, + "accuracy_stderr": 0.018785366645343925, + "evaluation_time": 31.09, + "f1": 0.42264248858567033, + "f1_stderr": 0.011565342464001555, + "main_score": 0.46390000000000003 + } +} \ No newline at end of file diff --git a/evaluation/mteb/FEVER.json b/evaluation/mteb/FEVER.json new file mode 100644 index 0000000000000000000000000000000000000000..af1515badf6192d1b58f248cdb27fb599b93ef21 --- /dev/null +++ b/evaluation/mteb/FEVER.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 17506.78, + "map_at_1": 0.50916, + "map_at_10": 0.62258, + "map_at_100": 0.62741, + "map_at_1000": 0.62763, + "map_at_3": 0.60018, + "map_at_5": 0.6142, + "mrr_at_1": 0.54965, + "mrr_at_10": 0.66554, + "mrr_at_100": 0.66966, + "mrr_at_1000": 0.66978, + "mrr_at_3": 0.64414, + "mrr_at_5": 0.6577, + "ndcg_at_1": 0.54965, + "ndcg_at_10": 0.6812, + "ndcg_at_100": 0.70282, + "ndcg_at_1000": 0.70788, + "ndcg_at_3": 0.63862, + "ndcg_at_5": 0.66216, + "precision_at_1": 0.54965, + "precision_at_10": 0.08998, + "precision_at_100": 0.01016, + "precision_at_1000": 0.00107, + "precision_at_3": 0.25618, + "precision_at_5": 0.16676, + "recall_at_1": 0.50916, + "recall_at_10": 0.8204, + "recall_at_100": 0.91689, + "recall_at_1000": 0.95349, + "recall_at_3": 0.70512, + "recall_at_5": 0.76299 + } +} \ No newline at end of file diff --git a/evaluation/mteb/FiQA2018.json b/evaluation/mteb/FiQA2018.json new file mode 100644 index 0000000000000000000000000000000000000000..1e9747ab3eb7c80e9d4129844c8150cf6997b303 --- /dev/null +++ b/evaluation/mteb/FiQA2018.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 310.86, + "map_at_1": 0.13568, + "map_at_10": 0.23264, + "map_at_100": 0.24824, + "map_at_1000": 0.25014, + "map_at_3": 0.19724, + "map_at_5": 0.21772, + "mrr_at_1": 0.27315, + "mrr_at_10": 0.35935, + "mrr_at_100": 0.36929, + "mrr_at_1000": 0.36985, + "mrr_at_3": 0.33591, + "mrr_at_5": 0.34848, + "ndcg_at_1": 0.27315, + "ndcg_at_10": 0.29988, + "ndcg_at_100": 0.3641, + "ndcg_at_1000": 0.40185, + "ndcg_at_3": 0.26342, + "ndcg_at_5": 0.2768, + "precision_at_1": 0.27315, + "precision_at_10": 0.08565, + "precision_at_100": 0.01508, + "precision_at_1000": 0.00219, + "precision_at_3": 0.1785, + "precision_at_5": 0.13673, + "recall_at_1": 0.13568, + "recall_at_10": 0.37133, + "recall_at_100": 0.61475, + "recall_at_1000": 0.84372, + "recall_at_3": 0.24112, + "recall_at_5": 0.29507 + } +} \ No newline at end of file diff --git a/evaluation/mteb/HotpotQA.json b/evaluation/mteb/HotpotQA.json new file mode 100644 index 0000000000000000000000000000000000000000..645751d6161fdaa29da081c66469e67c4c54752f --- /dev/null +++ b/evaluation/mteb/HotpotQA.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 13678.13, + "map_at_1": 0.30878, + "map_at_10": 0.40868, + "map_at_100": 0.41694, + "map_at_1000": 0.41775, + "map_at_3": 0.3856, + "map_at_5": 0.39947, + "mrr_at_1": 0.61756, + "mrr_at_10": 0.68265, + "mrr_at_100": 0.68671, + "mrr_at_1000": 0.68694, + "mrr_at_3": 0.66784, + "mrr_at_5": 0.67704, + "ndcg_at_1": 0.61756, + "ndcg_at_10": 0.49931, + "ndcg_at_100": 0.53179, + "ndcg_at_1000": 0.54948, + "ndcg_at_3": 0.46103, + "ndcg_at_5": 0.48147, + "precision_at_1": 0.61756, + "precision_at_10": 0.10163, + "precision_at_100": 0.01271, + "precision_at_1000": 0.00151, + "precision_at_3": 0.28179, + "precision_at_5": 0.18528, + "recall_at_1": 0.30878, + "recall_at_10": 0.50817, + "recall_at_100": 0.63545, + "recall_at_1000": 0.75361, + "recall_at_3": 0.42269, + "recall_at_5": 0.4632 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ImdbClassification.json b/evaluation/mteb/ImdbClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..27dda9476f53b0d49c261514f69c925aa4e3d3e4 --- /dev/null +++ b/evaluation/mteb/ImdbClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.6404799999999999, + "accuracy_stderr": 0.03197141223030349, + "ap": 0.5918525145533928, + "ap_stderr": 0.024531019346391677, + "evaluation_time": 915.58, + "f1": 0.6394712318134925, + "f1_stderr": 0.0317267000377415, + "main_score": 0.6404799999999999 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MSMARCO.json b/evaluation/mteb/MSMARCO.json new file mode 100644 index 0000000000000000000000000000000000000000..3f0d3ba0f7e9e453c493db51869983d8d9cb2451 --- /dev/null +++ b/evaluation/mteb/MSMARCO.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "validation": { + "evaluation_time": 22861.03, + "map_at_1": 0.189, + "map_at_10": 0.29748, + "map_at_100": 0.30976, + "map_at_1000": 0.31041, + "map_at_3": 0.26113, + "map_at_5": 0.28197, + "mrr_at_1": 0.19413, + "mrr_at_10": 0.30322, + "mrr_at_100": 0.31497, + "mrr_at_1000": 0.31555, + "mrr_at_3": 0.26729, + "mrr_at_5": 0.28789, + "ndcg_at_1": 0.19413, + "ndcg_at_10": 0.36048, + "ndcg_at_100": 0.42152, + "ndcg_at_1000": 0.43772, + "ndcg_at_3": 0.28642, + "ndcg_at_5": 0.32358, + "precision_at_1": 0.19413, + "precision_at_10": 0.05785, + "precision_at_100": 0.00887, + "precision_at_1000": 0.00103, + "precision_at_3": 0.12192, + "precision_at_5": 0.09189, + "recall_at_1": 0.189, + "recall_at_10": 0.55457, + "recall_at_100": 0.84091, + "recall_at_1000": 0.96482, + "recall_at_3": 0.35359, + "recall_at_5": 0.44275 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MTOPDomainClassification.json b/evaluation/mteb/MTOPDomainClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..4946293e97ab6047ef11faae3a8aebfa2130278a --- /dev/null +++ b/evaluation/mteb/MTOPDomainClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.9207706338349293, + "accuracy_stderr": 0.005541106237593856, + "f1": 0.9156680443236652, + "f1_stderr": 0.005216941657420623, + "main_score": 0.9207706338349293 + }, + "evaluation_time": 31.37 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MTOPIntentClassification.json b/evaluation/mteb/MTOPIntentClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..a9b6db0656c892cafcb6c5717606393c6af799fe --- /dev/null +++ b/evaluation/mteb/MTOPIntentClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.7118559051527589, + "accuracy_stderr": 0.012419016906278644, + "f1": 0.5242887061726789, + "f1_stderr": 0.011199354119349208, + "main_score": 0.7118559051527589 + }, + "evaluation_time": 82.02 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MassiveIntentClassification.json b/evaluation/mteb/MassiveIntentClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..7eddc2c50fe3aa32ccdf94ea595615b3ac295ddc --- /dev/null +++ b/evaluation/mteb/MassiveIntentClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.6864828513786148, + "accuracy_stderr": 0.012244395475104431, + "f1": 0.6654281381596097, + "f1_stderr": 0.009337695311493243, + "main_score": 0.6864828513786148 + }, + "evaluation_time": 49.93 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MassiveScenarioClassification.json b/evaluation/mteb/MassiveScenarioClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..5f6297aaf33067444cb6cea621c54e340c8f1af3 --- /dev/null +++ b/evaluation/mteb/MassiveScenarioClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en": { + "accuracy": 0.7604236718224613, + "accuracy_stderr": 0.009702648067132833, + "f1": 0.7589170458655639, + "f1_stderr": 0.009666037600064457, + "main_score": 0.7604236718224613 + }, + "evaluation_time": 29.65 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MedrxivClusteringP2P.json b/evaluation/mteb/MedrxivClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..f947688112ab7e0d71922d81f04cd5d19891bb4c --- /dev/null +++ b/evaluation/mteb/MedrxivClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1694.42, + "v_measure": 0.320840369055247, + "v_measure_std": 0.012975340075124157 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MedrxivClusteringS2S.json b/evaluation/mteb/MedrxivClusteringS2S.json new file mode 100644 index 0000000000000000000000000000000000000000..2b39a05e6e123c5b1bc1deefc56a3c9a9f06692b --- /dev/null +++ b/evaluation/mteb/MedrxivClusteringS2S.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 258.59, + "v_measure": 0.29448729560244535, + "v_measure_std": 0.014610308837145732 + } +} \ No newline at end of file diff --git a/evaluation/mteb/MindSmallReranking.json b/evaluation/mteb/MindSmallReranking.json new file mode 100644 index 0000000000000000000000000000000000000000..7a63ca8db8ae95ad81ea08fbb2695db469ddce81 --- /dev/null +++ b/evaluation/mteb/MindSmallReranking.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 18115.54, + "map": 0.31340856463122374, + "mrr": 0.32398547669840916 + } +} \ No newline at end of file diff --git a/evaluation/mteb/NFCorpus.json b/evaluation/mteb/NFCorpus.json new file mode 100644 index 0000000000000000000000000000000000000000..7d33ef1a4b042a89b2901f4208fc882aa290e111 --- /dev/null +++ b/evaluation/mteb/NFCorpus.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 101.25, + "map_at_1": 0.05526, + "map_at_10": 0.11745, + "map_at_100": 0.14831, + "map_at_1000": 0.16235, + "map_at_3": 0.08716, + "map_at_5": 0.10101, + "mrr_at_1": 0.43653, + "mrr_at_10": 0.51067, + "mrr_at_100": 0.51881, + "mrr_at_1000": 0.51912, + "mrr_at_3": 0.4902, + "mrr_at_5": 0.50289, + "ndcg_at_1": 0.4195, + "ndcg_at_10": 0.32083, + "ndcg_at_100": 0.3005, + "ndcg_at_1000": 0.38661, + "ndcg_at_3": 0.3794, + "ndcg_at_5": 0.35456, + "precision_at_1": 0.43344, + "precision_at_10": 0.23437, + "precision_at_100": 0.0783, + "precision_at_1000": 0.02053, + "precision_at_3": 0.35501, + "precision_at_5": 0.30464, + "recall_at_1": 0.05526, + "recall_at_10": 0.15446, + "recall_at_100": 0.31179, + "recall_at_1000": 0.61578, + "recall_at_3": 0.0971, + "recall_at_5": 0.12026 + } +} \ No newline at end of file diff --git a/evaluation/mteb/NQ.json b/evaluation/mteb/NQ.json new file mode 100644 index 0000000000000000000000000000000000000000..ab126329d918d1e8972475179e72ad4e75744a5f --- /dev/null +++ b/evaluation/mteb/NQ.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 8091.11, + "map_at_1": 0.23467, + "map_at_10": 0.36041, + "map_at_100": 0.37268, + "map_at_1000": 0.37322, + "map_at_3": 0.3209, + "map_at_5": 0.34414, + "mrr_at_1": 0.26738, + "mrr_at_10": 0.38665, + "mrr_at_100": 0.3964, + "mrr_at_1000": 0.39681, + "mrr_at_3": 0.35207, + "mrr_at_5": 0.3731, + "ndcg_at_1": 0.26709, + "ndcg_at_10": 0.42942, + "ndcg_at_100": 0.48296, + "ndcg_at_1000": 0.49651, + "ndcg_at_3": 0.35413, + "ndcg_at_5": 0.39368, + "precision_at_1": 0.26709, + "precision_at_10": 0.07306, + "precision_at_100": 0.01029, + "precision_at_1000": 0.00116, + "precision_at_3": 0.16348, + "precision_at_5": 0.12068, + "recall_at_1": 0.23467, + "recall_at_10": 0.61493, + "recall_at_100": 0.85011, + "recall_at_1000": 0.95261, + "recall_at_3": 0.41952, + "recall_at_5": 0.51106 + } +} \ No newline at end of file diff --git a/evaluation/mteb/QuoraRetrieval.json b/evaluation/mteb/QuoraRetrieval.json new file mode 100644 index 0000000000000000000000000000000000000000..21d2067f2a225ff7c2b3d675096c2c1d8e3d1163 --- /dev/null +++ b/evaluation/mteb/QuoraRetrieval.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 742.32, + "map_at_1": 0.67517, + "map_at_10": 0.81054, + "map_at_100": 0.81727, + "map_at_1000": 0.81752, + "map_at_3": 0.78018, + "map_at_5": 0.79879, + "mrr_at_1": 0.7752, + "mrr_at_10": 0.84429, + "mrr_at_100": 0.84582, + "mrr_at_1000": 0.84584, + "mrr_at_3": 0.83268, + "mrr_at_5": 0.84013, + "ndcg_at_1": 0.7753, + "ndcg_at_10": 0.85277, + "ndcg_at_100": 0.86805, + "ndcg_at_1000": 0.8701, + "ndcg_at_3": 0.81975, + "ndcg_at_5": 0.83723, + "precision_at_1": 0.7753, + "precision_at_10": 0.12961, + "precision_at_100": 0.01502, + "precision_at_1000": 0.00156, + "precision_at_3": 0.35713, + "precision_at_5": 0.23574, + "recall_at_1": 0.67517, + "recall_at_10": 0.93486, + "recall_at_100": 0.989, + "recall_at_1000": 0.9993, + "recall_at_3": 0.8418, + "recall_at_5": 0.88975 + } +} \ No newline at end of file diff --git a/evaluation/mteb/RedditClustering.json b/evaluation/mteb/RedditClustering.json new file mode 100644 index 0000000000000000000000000000000000000000..8291e6169e8f50cabd97d951b8dfb00cf3cbe7d8 --- /dev/null +++ b/evaluation/mteb/RedditClustering.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 3324.44, + "v_measure": 0.48225994608749917, + "v_measure_std": 0.055941572162319216 + } +} \ No newline at end of file diff --git a/evaluation/mteb/RedditClusteringP2P.json b/evaluation/mteb/RedditClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..18994626e38fb5b303366a9a993eb6629b76bb73 --- /dev/null +++ b/evaluation/mteb/RedditClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 13390.64, + "v_measure": 0.5317635557157765, + "v_measure_std": 0.12570620726605897 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SCIDOCS.json b/evaluation/mteb/SCIDOCS.json new file mode 100644 index 0000000000000000000000000000000000000000..a033fd75a00e57dbb9dac5289ad7a59db6919f2b --- /dev/null +++ b/evaluation/mteb/SCIDOCS.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 214.39, + "map_at_1": 0.03988, + "map_at_10": 0.094, + "map_at_100": 0.10968, + "map_at_1000": 0.11257, + "map_at_3": 0.07123, + "map_at_5": 0.08221, + "mrr_at_1": 0.197, + "mrr_at_10": 0.29098, + "mrr_at_100": 0.30247, + "mrr_at_1000": 0.30318, + "mrr_at_3": 0.2655, + "mrr_at_5": 0.27915, + "ndcg_at_1": 0.197, + "ndcg_at_10": 0.16176, + "ndcg_at_100": 0.22931, + "ndcg_at_1000": 0.28301, + "ndcg_at_3": 0.16142, + "ndcg_at_5": 0.13634, + "precision_at_1": 0.197, + "precision_at_10": 0.0818, + "precision_at_100": 0.01801, + "precision_at_1000": 0.00309, + "precision_at_3": 0.151, + "precision_at_5": 0.1174, + "recall_at_1": 0.03988, + "recall_at_10": 0.16625, + "recall_at_100": 0.3661, + "recall_at_1000": 0.62805, + "recall_at_3": 0.09168, + "recall_at_5": 0.11902 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SGPT-1.3B-weightedmean-msmarco-specb-bitfit_results.csv b/evaluation/mteb/SGPT-1.3B-weightedmean-msmarco-specb-bitfit_results.csv new file mode 100644 index 0000000000000000000000000000000000000000..2e8d3d2b8a036cbf25dea500595a1024450bcd04 --- /dev/null +++ b/evaluation/mteb/SGPT-1.3B-weightedmean-msmarco-specb-bitfit_results.csv @@ -0,0 +1,355 @@ +model,task,dataset,language,metric,value +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,BitextMining,BUCC,,f1, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,BitextMining,Tatoeba,,f1, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,en,accuracy,0.652089552238806 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,en-ext,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,en-ext,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,ja,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonCounterfactualClassification,ja,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonPolarityClassification,en,accuracy,0.7320565 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,en,accuracy,0.34956 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,ja,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,ja,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,zh,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,AmazonReviewsClassification,zh,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,Banking77Classification,en,accuracy,0.8205844155844156 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,EmotionClassification,en,accuracy,0.46390000000000003 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,ImdbClassification,en,accuracy,0.6404799999999999 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,af,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,af,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,am,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,am,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ar,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ar,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,az,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,az,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,bn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,bn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,cy,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,cy,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,da,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,da,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,el,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,el,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,en,accuracy,0.6864828513786148 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fa,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fa,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,he,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,he,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hu,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hu,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hy,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,hy,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,id,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,id,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,is,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,is,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,it,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,it,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ja,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ja,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,jv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,jv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ka,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ka,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,km,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,km,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,kn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,kn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ko,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ko,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,lv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,lv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ml,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ml,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,mn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,mn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ms,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ms,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,my,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,my,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nb,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nb,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,nl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pt,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,pt,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ro,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ro,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ru,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ru,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sq,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sq,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sw,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,sw,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ta,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ta,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,te,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,te,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,th,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,th,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,tr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ur,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,ur,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,vi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,vi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-CN,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-CN,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-TW,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveIntentClassification,zh-TW,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,af,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,af,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,am,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,am,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ar,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ar,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,az,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,az,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,bn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,bn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,cy,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,cy,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,da,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,da,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,el,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,el,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,en,accuracy,0.7604236718224613 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fa,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fa,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,he,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,he,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hu,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hu,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hy,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,hy,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,id,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,id,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,is,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,is,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,it,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,it,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ja,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ja,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,jv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,jv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ka,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ka,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,km,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,km,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,kn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,kn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ko,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ko,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,lv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,lv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ml,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ml,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,mn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,mn,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ms,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ms,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,my,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,my,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nb,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nb,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,nl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pt,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,pt,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ro,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ro,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ru,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ru,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sq,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sq,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sv,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sw,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,sw,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ta,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ta,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,te,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,te,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,th,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,th,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tl,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,tr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ur,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,ur,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,vi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,vi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-CN,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-CN,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-TW,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MassiveScenarioClassification,zh-TW,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,en,accuracy,0.9207706338349293 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,hi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,hi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,th,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPDomainClassification,th,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,en,accuracy,0.7118559051527589 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,de,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,es,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,fr,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,hi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,hi,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,th,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,MTOPIntentClassification,th,accuracy, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,ToxicConversationsClassification,en,accuracy,0.687272 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,TweetSentimentExtractionClassification,en,accuracy,0.556677985285795 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,ArxivClusteringP2P,en,v_measure,0.4338419491695377 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,ArxivClusteringS2S,en,v_measure,0.3370962633433912 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,BiorxivClusteringP2P,en,v_measure,0.3505918333141837 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,BiorxivClusteringS2S,en,v_measure,0.3071055028830579 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,MedrxivClusteringP2P,en,v_measure,0.320840369055247 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,MedrxivClusteringS2S,en,v_measure,0.29448729560244535 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,RedditClustering,en,v_measure,0.48225994608749917 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,RedditClusteringP2P,en,v_measure,0.5317635557157765 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,StackExchangeClustering,en,v_measure,0.6085593925770172 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,StackExchangeClusteringP2P,en,v_measure,0.323567729982375 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,TwentyNewsgroupsClustering,en,v_measure,0.4005809562275603 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,PairClassification,SprintDuplicateQuestions,en,ap,0.9257836032673468 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,PairClassification,TwitterSemEval2015,en,ap,0.6237052608803734 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,PairClassification,TwitterURLCorpus,en,ap,0.8378748636891035 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Reranking,AskUbuntuDupQuestions,en,map,0.5813305899687008 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Reranking,MindSmallReranking,en,map,0.31340856463122374 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Reranking,SciDocsRR,en,map,0.7721233007730808 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Reranking,StackOverflowDupQuestions,en,map,0.49320607035290737 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,ArguAna,en,ndcg_at_10,0.4968 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,ClimateFEVER,en,ndcg_at_10,0.26595 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,CQADupstackRetrieval,en,ndcg_at_10,0.3332616666666667 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,DBPedia,en,ndcg_at_10,0.31507 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,FEVER,en,ndcg_at_10,0.6812 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,FiQA2018,en,ndcg_at_10,0.29988 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,HotpotQA,en,ndcg_at_10,0.49931 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,MSMARCO,en,ndcg_at_10,0.36048 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,NFCorpus,en,ndcg_at_10,0.32083 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,NQ,en,ndcg_at_10,0.42942 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,QuoraRetrieval,en,ndcg_at_10,0.85277 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,SCIDOCS,en,ndcg_at_10,0.16176 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,SciFact,en,ndcg_at_10,0.68287 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,Touche2020,en,ndcg_at_10,0.2445 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,TRECCOVID,en,ndcg_at_10,0.7298 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,BIOSSES,en,cosine_spearman,0.8301514456843275 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,SICK-R,en,cosine_spearman,0.6722953551111448 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS12,en,cosine_spearman,0.6659241013465915 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS13,en,cosine_spearman,0.7733151317062642 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS14,en,cosine_spearman,0.7182815265534886 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS15,en,cosine_spearman,0.8065931167868883 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS16,en,cosine_spearman,0.7891151365939403 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,ko-ko,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,ko-ko,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,ar-ar,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,ar-ar,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-ar,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-ar,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-de,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-de,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-en,cosine_spearman,0.8698674844768606 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-tr,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,en-tr,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,es-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,es-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,es-es,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,es-es,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,fr-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,fr-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,it-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,it-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,nl-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS17,nl-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,en,cosine_spearman,0.6730387805090023 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,de,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,de,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,es,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,es,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,pl,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,pl,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,tr,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,tr,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,ar,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,ar,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,ru,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,ru,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,zh,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,zh,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,fr,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,fr,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,es-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,es-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,it,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,it,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,pl-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,pl-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,zh-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,zh-en,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,es-it,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,es-it,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-fr,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-fr,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-pl,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,de-pl,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,fr-pl,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STS22,fr-pl,cosine_spearman, +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,STSBenchmark,en,cosine_spearman,0.775887175717799 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Summarization,SummEval,en,cosine_spearman,0.25440272876996695 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Classification,average,en,accuracy,0.6651794596081484 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Clustering,average,en,v_measure,0.3991540721780082 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,PairClassification,average,en,ap,0.7957879092789413 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Reranking,average,en,map,0.5400171314314782 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,Retrieval,average,en,ndcg_at_10,0.4449267777777778 +SGPT-1.3B-weightedmean-msmarco-specb-bitfit,STS,average,en,cosine_spearman,0.7574469254486307 diff --git a/evaluation/mteb/SICK-R.json b/evaluation/mteb/SICK-R.json new file mode 100644 index 0000000000000000000000000000000000000000..073143c668809ea6fd2be5e962adf91480e1d93e --- /dev/null +++ b/evaluation/mteb/SICK-R.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7729330379162072, + "spearman": 0.6722953551111448 + }, + "euclidean": { + "pearson": 0.7144682700059415, + "spearman": 0.6633178012153248 + }, + "evaluation_time": 52.38, + "manhattan": { + "pearson": 0.7146941734657887, + "spearman": 0.6643234359835815 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS12.json b/evaluation/mteb/STS12.json new file mode 100644 index 0000000000000000000000000000000000000000..e1056ff28d205d1e180ac1b2fad35987e2278718 --- /dev/null +++ b/evaluation/mteb/STS12.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7540943196466576, + "spearman": 0.6659241013465915 + }, + "euclidean": { + "pearson": 0.7132500540796616, + "spearman": 0.6786667467202591 + }, + "evaluation_time": 26.18, + "manhattan": { + "pearson": 0.7148209832089134, + "spearman": 0.6794511626964879 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS13.json b/evaluation/mteb/STS13.json new file mode 100644 index 0000000000000000000000000000000000000000..46fcd35588525121a4e4c7c4907b64b1be27e7ca --- /dev/null +++ b/evaluation/mteb/STS13.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7708302398877518, + "spearman": 0.7733151317062642 + }, + "euclidean": { + "pearson": 0.7677020279715008, + "spearman": 0.7713893776083225 + }, + "evaluation_time": 12.09, + "manhattan": { + "pearson": 0.7676732290707478, + "spearman": 0.7714500877396631 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS14.json b/evaluation/mteb/STS14.json new file mode 100644 index 0000000000000000000000000000000000000000..9947966cf08a18adfd4e3ad322792423d5d08797 --- /dev/null +++ b/evaluation/mteb/STS14.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7746886184932169, + "spearman": 0.7182815265534886 + }, + "euclidean": { + "pearson": 0.7519783284299076, + "spearman": 0.7136479611710412 + }, + "evaluation_time": 26.49, + "manhattan": { + "pearson": 0.7530375233959338, + "spearman": 0.7146280266488021 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS15.json b/evaluation/mteb/STS15.json new file mode 100644 index 0000000000000000000000000000000000000000..c622d3ea3e5bab75fc5b5c954f5aae1cae1b0ab2 --- /dev/null +++ b/evaluation/mteb/STS15.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.80093017609484, + "spearman": 0.8065931167868883 + }, + "euclidean": { + "pearson": 0.8036786337117048, + "spearman": 0.8130521389642827 + }, + "evaluation_time": 22.02, + "manhattan": { + "pearson": 0.8037922433220973, + "spearman": 0.8130496664496284 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS16.json b/evaluation/mteb/STS16.json new file mode 100644 index 0000000000000000000000000000000000000000..0df71365533f61ca62070122390a4508739f77dd --- /dev/null +++ b/evaluation/mteb/STS16.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7798998347238743, + "spearman": 0.7891151365939403 + }, + "euclidean": { + "pearson": 0.764051089921784, + "spearman": 0.768551459824213 + }, + "evaluation_time": 11.02, + "manhattan": { + "pearson": 0.763986079603294, + "spearman": 0.768848053254288 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS17.json b/evaluation/mteb/STS17.json new file mode 100644 index 0000000000000000000000000000000000000000..b87e5613f49db2bd21cf0189faf2a30559c9fb7b --- /dev/null +++ b/evaluation/mteb/STS17.json @@ -0,0 +1,21 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "en-en": { + "cos_sim": { + "pearson": 0.8563510653472044, + "spearman": 0.8698674844768606 + }, + "euclidean": { + "pearson": 0.85205080538809, + "spearman": 0.8553630494151886 + }, + "manhattan": { + "pearson": 0.8548612469885626, + "spearman": 0.8581741413931921 + } + }, + "evaluation_time": 2.89 + } +} \ No newline at end of file diff --git a/evaluation/mteb/STS22.json b/evaluation/mteb/STS22.json new file mode 100644 index 0000000000000000000000000000000000000000..b6f003969e72c3e7dcdedebb43604fdd9a7a9736 --- /dev/null +++ b/evaluation/mteb/STS22.json @@ -0,0 +1,21 @@ +{ + "dataset_version": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_version": "0.0.2", + "test": { + "en": { + "cos_sim": { + "pearson": 0.667257987615171, + "spearman": 0.6730387805090023 + }, + "euclidean": { + "pearson": 0.6946877227885866, + "spearman": 0.6933161798704344 + }, + "manhattan": { + "pearson": 0.6982773311626425, + "spearman": 0.6957199940498796 + } + }, + "evaluation_time": 19.09 + } +} \ No newline at end of file diff --git a/evaluation/mteb/STSBenchmark.json b/evaluation/mteb/STSBenchmark.json new file mode 100644 index 0000000000000000000000000000000000000000..eda1556c7787e5a254cf12fe6d0c4810eee1f67e --- /dev/null +++ b/evaluation/mteb/STSBenchmark.json @@ -0,0 +1,19 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.7937322139418472, + "spearman": 0.775887175717799 + }, + "euclidean": { + "pearson": 0.7823006410562163, + "spearman": 0.7718470385673044 + }, + "evaluation_time": 10.45, + "manhattan": { + "pearson": 0.7840868369362456, + "spearman": 0.7736675823897655 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/SciDocsRR.json b/evaluation/mteb/SciDocsRR.json new file mode 100644 index 0000000000000000000000000000000000000000..c006ee4575e2168d0cf416c509a81bbf8e86b587 --- /dev/null +++ b/evaluation/mteb/SciDocsRR.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 524.28, + "map": 0.7721233007730808, + "mrr": 0.930502386139641 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SciFact.json b/evaluation/mteb/SciFact.json new file mode 100644 index 0000000000000000000000000000000000000000..117515553812190d0655442189a39abb06294408 --- /dev/null +++ b/evaluation/mteb/SciFact.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 102.49, + "map_at_1": 0.54567, + "map_at_10": 0.63653, + "map_at_100": 0.64282, + "map_at_1000": 0.64311, + "map_at_3": 0.60478, + "map_at_5": 0.62322, + "mrr_at_1": 0.57, + "mrr_at_10": 0.64759, + "mrr_at_100": 0.65274, + "mrr_at_1000": 0.65301, + "mrr_at_3": 0.62333, + "mrr_at_5": 0.63817, + "ndcg_at_1": 0.57, + "ndcg_at_10": 0.68287, + "ndcg_at_100": 0.70984, + "ndcg_at_1000": 0.71695, + "ndcg_at_3": 0.62656, + "ndcg_at_5": 0.65523, + "precision_at_1": 0.57, + "precision_at_10": 0.09233, + "precision_at_100": 0.01063, + "precision_at_1000": 0.00112, + "precision_at_3": 0.24222, + "precision_at_5": 0.16333, + "recall_at_1": 0.54567, + "recall_at_10": 0.81456, + "recall_at_100": 0.935, + "recall_at_1000": 0.99, + "recall_at_3": 0.66228, + "recall_at_5": 0.73489 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SprintDuplicateQuestions.json b/evaluation/mteb/SprintDuplicateQuestions.json new file mode 100644 index 0000000000000000000000000000000000000000..af1710cccb6a1c0b8624803ceb8763db92be2578 --- /dev/null +++ b/evaluation/mteb/SprintDuplicateQuestions.json @@ -0,0 +1,48 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "accuracy": 0.9974455445544554, + "accuracy_threshold": 0.7414648532867432, + "ap": 0.9257836032673468, + "f1": 0.8704714640198511, + "f1_threshold": 0.7260521650314331, + "precision": 0.864039408866995, + "recall": 0.877 + }, + "dot": { + "accuracy": 0.9956039603960396, + "accuracy_threshold": 2157.26416015625, + "ap": 0.8247233353407186, + "f1": 0.7678207739307537, + "f1_threshold": 2119.027099609375, + "precision": 0.7821576763485477, + "recall": 0.754 + }, + "euclidean": { + "accuracy": 0.9973069306930693, + "accuracy_threshold": 37.43426513671875, + "ap": 0.9170507666665775, + "f1": 0.8626262626262625, + "f1_threshold": 38.686073303222656, + "precision": 0.8714285714285714, + "recall": 0.854 + }, + "evaluation_time": 37.38, + "manhattan": { + "accuracy": 0.9973861386138614, + "accuracy_threshold": 1329.514404296875, + "ap": 0.9196809459281754, + "f1": 0.866, + "f1_threshold": 1371.76220703125, + "precision": 0.866, + "recall": 0.866 + }, + "max": { + "accuracy": 0.9974455445544554, + "ap": 0.9257836032673468, + "f1": 0.8704714640198511 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/StackExchangeClustering.json b/evaluation/mteb/StackExchangeClustering.json new file mode 100644 index 0000000000000000000000000000000000000000..f96fa6ae12da537418dc6a8aab9bdce961c23b63 --- /dev/null +++ b/evaluation/mteb/StackExchangeClustering.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 2284.12, + "v_measure": 0.6085593925770172, + "v_measure_std": 0.038482156020085645 + } +} \ No newline at end of file diff --git a/evaluation/mteb/StackExchangeClusteringP2P.json b/evaluation/mteb/StackExchangeClusteringP2P.json new file mode 100644 index 0000000000000000000000000000000000000000..88a72c74071106452694209b316b4e617ee9c0f8 --- /dev/null +++ b/evaluation/mteb/StackExchangeClusteringP2P.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 2853.86, + "v_measure": 0.323567729982375, + "v_measure_std": 0.013801405842732442 + } +} \ No newline at end of file diff --git a/evaluation/mteb/StackOverflowDupQuestions.json b/evaluation/mteb/StackOverflowDupQuestions.json new file mode 100644 index 0000000000000000000000000000000000000000..ab01ddd325ffe2306bb47b83cfad796afc064340 --- /dev/null +++ b/evaluation/mteb/StackOverflowDupQuestions.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 280.13, + "map": 0.49320607035290737, + "mrr": 0.5009196481622952 + } +} \ No newline at end of file diff --git a/evaluation/mteb/SummEval.json b/evaluation/mteb/SummEval.json new file mode 100644 index 0000000000000000000000000000000000000000..b783cc2ec388c4b5158bcc4a2670719bca436d03 --- /dev/null +++ b/evaluation/mteb/SummEval.json @@ -0,0 +1,15 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "pearson": 0.2557602918901377, + "spearman": 0.25440272876996695 + }, + "dot": { + "pearson": 0.24909680980895063, + "spearman": 0.24032627570006823 + }, + "evaluation_time": 507.55 + } +} \ No newline at end of file diff --git a/evaluation/mteb/TRECCOVID.json b/evaluation/mteb/TRECCOVID.json new file mode 100644 index 0000000000000000000000000000000000000000..02d35909717b8c979ede2b2724ad2c6d117d0939 --- /dev/null +++ b/evaluation/mteb/TRECCOVID.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 810.57, + "map_at_1": 0.00221, + "map_at_10": 0.01723, + "map_at_100": 0.09195, + "map_at_1000": 0.21999, + "map_at_3": 0.00648, + "map_at_5": 0.00964, + "mrr_at_1": 0.86, + "mrr_at_10": 0.90667, + "mrr_at_100": 0.90858, + "mrr_at_1000": 0.90858, + "mrr_at_3": 0.90667, + "mrr_at_5": 0.90667, + "ndcg_at_1": 0.82, + "ndcg_at_10": 0.7298, + "ndcg_at_100": 0.52868, + "ndcg_at_1000": 0.46541, + "ndcg_at_3": 0.80397, + "ndcg_at_5": 0.76303, + "precision_at_1": 0.86, + "precision_at_10": 0.758, + "precision_at_100": 0.535, + "precision_at_1000": 0.20946, + "precision_at_3": 0.85333, + "precision_at_5": 0.792, + "recall_at_1": 0.00221, + "recall_at_10": 0.01911, + "recall_at_100": 0.12437, + "recall_at_1000": 0.43606, + "recall_at_3": 0.00681, + "recall_at_5": 0.01023 + } +} \ No newline at end of file diff --git a/evaluation/mteb/Touche2020.json b/evaluation/mteb/Touche2020.json new file mode 100644 index 0000000000000000000000000000000000000000..4db0a75af26787c93a55e40c8d99b3f50189296b --- /dev/null +++ b/evaluation/mteb/Touche2020.json @@ -0,0 +1,37 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 1503.97, + "map_at_1": 0.025, + "map_at_10": 0.09569, + "map_at_100": 0.15653, + "map_at_1000": 0.17188, + "map_at_3": 0.05336, + "map_at_5": 0.06522, + "mrr_at_1": 0.34694, + "mrr_at_10": 0.49184, + "mrr_at_100": 0.50512, + "mrr_at_1000": 0.50512, + "mrr_at_3": 0.46259, + "mrr_at_5": 0.48299, + "ndcg_at_1": 0.30612, + "ndcg_at_10": 0.2445, + "ndcg_at_100": 0.35871, + "ndcg_at_1000": 0.47273, + "ndcg_at_3": 0.28528, + "ndcg_at_5": 0.25768, + "precision_at_1": 0.34694, + "precision_at_10": 0.21429, + "precision_at_100": 0.07265, + "precision_at_1000": 0.01504, + "precision_at_3": 0.29252, + "precision_at_5": 0.24898, + "recall_at_1": 0.025, + "recall_at_10": 0.15844, + "recall_at_100": 0.45469, + "recall_at_1000": 0.81148, + "recall_at_3": 0.06496, + "recall_at_5": 0.0879 + } +} \ No newline at end of file diff --git a/evaluation/mteb/ToxicConversationsClassification.json b/evaluation/mteb/ToxicConversationsClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..f61263a004b1b46f3fbc073d6c2bec3678aa4a8e --- /dev/null +++ b/evaluation/mteb/ToxicConversationsClassification.json @@ -0,0 +1,14 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.687272, + "accuracy_stderr": 0.03348102292344127, + "ap": 0.13156450706152686, + "ap_stderr": 0.005960626601835911, + "evaluation_time": 722.59, + "f1": 0.5281470343706439, + "f1_stderr": 0.01880416225488776, + "main_score": 0.687272 + } +} \ No newline at end of file diff --git a/evaluation/mteb/TweetSentimentExtractionClassification.json b/evaluation/mteb/TweetSentimentExtractionClassification.json new file mode 100644 index 0000000000000000000000000000000000000000..f11e565579f55af212074b8fa14b10bb0c38605c --- /dev/null +++ b/evaluation/mteb/TweetSentimentExtractionClassification.json @@ -0,0 +1,12 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "accuracy": 0.556677985285795, + "accuracy_stderr": 0.01228542780288399, + "evaluation_time": 38.52, + "f1": 0.559373937514999, + "f1_stderr": 0.012418163020884165, + "main_score": 0.556677985285795 + } +} \ No newline at end of file diff --git a/evaluation/mteb/TwentyNewsgroupsClustering.json b/evaluation/mteb/TwentyNewsgroupsClustering.json new file mode 100644 index 0000000000000000000000000000000000000000..7c5ba6e078f94370338c35860e14a74ac49b956e --- /dev/null +++ b/evaluation/mteb/TwentyNewsgroupsClustering.json @@ -0,0 +1,9 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "evaluation_time": 174.06, + "v_measure": 0.4005809562275603, + "v_measure_std": 0.018355068492832586 + } +} \ No newline at end of file diff --git a/evaluation/mteb/TwitterSemEval2015.json b/evaluation/mteb/TwitterSemEval2015.json new file mode 100644 index 0000000000000000000000000000000000000000..61132ec121fda3e5ca28aa58b796913b55adbb40 --- /dev/null +++ b/evaluation/mteb/TwitterSemEval2015.json @@ -0,0 +1,48 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "accuracy": 0.8276807534124099, + "accuracy_threshold": 0.7320027351379395, + "ap": 0.6237052608803734, + "f1": 0.5907741493491665, + "f1_threshold": 0.6374467015266418, + "precision": 0.5207326892109501, + "recall": 0.6825857519788918 + }, + "dot": { + "accuracy": 0.8056267509089825, + "accuracy_threshold": 2350.834716796875, + "ap": 0.5475349561321037, + "f1": 0.5475483794372552, + "f1_threshold": 1947.31787109375, + "precision": 0.4977336499028707, + "recall": 0.60844327176781 + }, + "euclidean": { + "accuracy": 0.82476008821601, + "accuracy_threshold": 40.10894775390625, + "ap": 0.6117417554210511, + "f1": 0.5780318696022382, + "f1_threshold": 45.30638122558594, + "precision": 0.5362220717670955, + "recall": 0.6269129287598945 + }, + "evaluation_time": 56.65, + "manhattan": { + "accuracy": 0.8248792990403528, + "accuracy_threshold": 1387.340576171875, + "ap": 0.6104481629296654, + "f1": 0.5803033951360462, + "f1_threshold": 1590.137451171875, + "precision": 0.5336581045172719, + "recall": 0.6358839050131926 + }, + "max": { + "accuracy": 0.8276807534124099, + "ap": 0.6237052608803734, + "f1": 0.5907741493491665 + } + } +} \ No newline at end of file diff --git a/evaluation/mteb/TwitterURLCorpus.json b/evaluation/mteb/TwitterURLCorpus.json new file mode 100644 index 0000000000000000000000000000000000000000..d4cb0df751ce05d77a9c07e46726afbcc303a39f --- /dev/null +++ b/evaluation/mteb/TwitterURLCorpus.json @@ -0,0 +1,48 @@ +{ + "dataset_version": null, + "mteb_version": "0.0.2", + "test": { + "cos_sim": { + "accuracy": 0.8797881010594947, + "accuracy_threshold": 0.6835847496986389, + "ap": 0.8378748636891035, + "f1": 0.7594113995691386, + "f1_threshold": 0.6381056308746338, + "precision": 0.7222029307590805, + "recall": 0.8006621496766245 + }, + "dot": { + "accuracy": 0.8569294058291613, + "accuracy_threshold": 2096.25537109375, + "ap": 0.7815363722278026, + "f1": 0.7208894926888564, + "f1_threshold": 1926.618408203125, + "precision": 0.6728959487419075, + "recall": 0.7762550046196489 + }, + "euclidean": { + "accuracy": 0.877362517949315, + "accuracy_threshold": 44.67449188232422, + "ap": 0.8319012184470559, + "f1": 0.7551480646234611, + "f1_threshold": 46.798240661621094, + "precision": 0.7263352535381552, + "recall": 0.786341238065907 + }, + "evaluation_time": 221.47, + "manhattan": { + "accuracy": 0.8774013272790779, + "accuracy_threshold": 1568.80908203125, + "ap": 0.8323305405113404, + "f1": 0.7563960775639608, + "f1_threshold": 1638.9019775390625, + "precision": 0.72563304569246, + "recall": 0.789882968894364 + }, + "max": { + "accuracy": 0.8797881010594947, + "ap": 0.8378748636891035, + "f1": 0.7594113995691386 + } + } +} \ No newline at end of file