Add new SparseEncoder model

Browse files

Files changed (11) hide show

1_SpladePooling/config.json +5 -0
README.md +1428 -0
config.json +23 -0
config_sentence_transformers.json +11 -0
model.safetensors +3 -0
modules.json +14 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +56 -0
vocab.txt +0 -0

1_SpladePooling/config.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "pooling_strategy": "max",
+    "activation_function": "relu",
+    "word_embedding_dimension": 30522
+}

README.md ADDED Viewed

	@@ -0,0 +1,1428 @@

+---
+language:
+- en
+license: apache-2.0
+tags:
+- sentence-transformers
+- sparse-encoder
+- sparse
+- splade
+- generated_from_trainer
+- dataset_size:99000
+- loss:SpladeLoss
+- loss:SparseMultipleNegativesRankingLoss
+- loss:FlopsLoss
+base_model: distilbert/distilbert-base-uncased
+widget:
+- source_sentence: Time Travel Is It Possible?
+  sentences:
+  - Why can you not accelerate to faster than light?
+  - Is time travel possible? If yes how
+  - What do you hAve to say about time travel (I am not science student but I read
+    it on net and its so exciting topic but still no clear idea that is it possible
+    or it's just a rumour)?
+- source_sentence: How can one be a good product manager?
+  sentences:
+  - How Do I become a product manager?
+  - Can you make online friends with other people on Quora?
+  - How do I become a product designer?
+- source_sentence: How do I start a business? Where can I get a funding in India if
+    I have a really good idea?
+  sentences:
+  - I have an awesome app/website idea which may get more than a billion users. But
+    I don't have required money and coding skills. I tried crowd-funding but didn't
+    help. What should I do?
+  - How do I get funding for my web based startup idea?
+  - What is the most powerful dog?
+- source_sentence: What are your favorite questions asked on Quora?
+  sentences:
+  - What are your favorite Quora questions and answers?
+  - How do you become a Successfull Game Developer?
+  - Who is your favorite Quora follower?
+- source_sentence: Which laptop is best under 25000 INR?
+  sentences:
+  - Why was the 1000 rupee note replaced with a 2000 rupee note?
+  - What is the best laptop under 45k?
+  - What are the best laptops under 25k?
+datasets:
+- sentence-transformers/quora-duplicates
+pipeline_tag: feature-extraction
+library_name: sentence-transformers
+metrics:
+- dot_accuracy@1
+- dot_accuracy@3
+- dot_accuracy@5
+- dot_accuracy@10
+- dot_precision@1
+- dot_precision@3
+- dot_precision@5
+- dot_precision@10
+- dot_recall@1
+- dot_recall@3
+- dot_recall@5
+- dot_recall@10
+- dot_ndcg@10
+- dot_mrr@10
+- dot_map@100
+- row_non_zero_mean_query
+- row_sparsity_mean_query
+- row_non_zero_mean_corpus
+- row_sparsity_mean_corpus
+model-index:
+- name: splade-distilbert-base-uncased trained on Quora Duplicates Questions
+  results:
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoClimateFEVER
+      type: NanoClimateFEVER
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.2
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.34
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.38
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.46
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.2
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.12
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.084
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.05800000000000001
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.08833333333333332
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.15333333333333332
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.17166666666666663
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.2223333333333333
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.19096782240643292
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.27904761904761904
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.1448665229843916
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 83.12000274658203
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.997276782989502
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 196.82540893554688
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9935513138771057
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoDBPedia
+      type: NanoDBPedia
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.46
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.66
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.76
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.82
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.46
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.4599999999999999
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.41200000000000003
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.34800000000000003
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.024992243870767848
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.08610042820194802
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.1356349864336842
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.2108700010340366
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.4008410950979539
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.5753888888888887
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.23475075762293293
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 110.18000030517578
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9963901042938232
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 146.9065399169922
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9951868057250977
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoFEVER
+      type: NanoFEVER
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.56
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.64
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.72
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.82
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.56
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.2333333333333333
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.15600000000000003
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.088
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.5266666666666666
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.6333333333333333
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.7133333333333333
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.8133333333333332
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.6697436984572378
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.6316349206349205
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.6281723194238796
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 96.77999877929688
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9968292117118835
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 219.1212921142578
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9928209185600281
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoFiQA2018
+      type: NanoFiQA2018
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.14
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.32
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.36
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.44
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.14
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.12
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.10400000000000001
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.068
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.06783333333333333
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.14569047619047618
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.20004761904761903
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.2636825396825397
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.19745078204560165
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.23552380952380955
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.14731140504396462
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 80.33999633789062
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9973678588867188
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 125.915771484375
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9958745241165161
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoHotpotQA
+      type: NanoHotpotQA
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.46
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.66
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.72
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.84
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.46
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.25333333333333335
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.176
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.11
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.23
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.38
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.44
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.55
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.4642094806420616
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.5762777777777778
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.3781729878529178
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 87.26000213623047
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9971410632133484
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 166.47190856933594
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9945458173751831
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoMSMARCO
+      type: NanoMSMARCO
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.16
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.26
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.36
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.46
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.16
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.08666666666666666
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.07200000000000001
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.046000000000000006
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.16
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.26
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.36
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.46
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.2889744107825637
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.23699999999999996
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.2547054047317205
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 96.05999755859375
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.996852695941925
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 105.46202850341797
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9965446591377258
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoNFCorpus
+      type: NanoNFCorpus
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.28
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.36
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.4
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.44
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.28
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.18666666666666665
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.18
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.14800000000000002
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.01004738213752895
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.017620026805744985
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.031161291315801767
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.04364801295748046
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.16900908943281664
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.3281666666666666
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.04873203232918475
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 122.94000244140625
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9959720373153687
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 199.5936279296875
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9934607744216919
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoNQ
+      type: NanoNQ
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.18
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.34
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.4
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.48
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.18
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.11333333333333333
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.08
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.04800000000000001
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.17
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.32
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.38
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.46
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.30557584177037744
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.26749206349206345
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.26111102151483273
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 79.22000122070312
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9974044561386108
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 145.250244140625
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.995241105556488
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoQuoraRetrieval
+      type: NanoQuoraRetrieval
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.92
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.96
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 1.0
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 1.0
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.92
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.3733333333333333
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.256
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.132
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.8206666666666667
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.8986666666666667
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.9726666666666667
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.9826666666666667
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.9456812009077233
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.95
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.9232605046294702
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 73.83999633789062
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9975807070732117
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 74.96769714355469
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9975438117980957
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoSCIDOCS
+      type: NanoSCIDOCS
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.36
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.5
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.62
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.7
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.36
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.26
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.19199999999999995
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.12399999999999999
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.07666666666666666
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.16166666666666665
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.19766666666666666
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.25466666666666665
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.2640445339047696
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.45502380952380955
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.18681370322897212
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 95.91999816894531
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9968574047088623
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 184.44908142089844
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9939568638801575
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoArguAna
+      type: NanoArguAna
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.1
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.28
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.32
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.38
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.1
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.09333333333333332
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.064
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.038000000000000006
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.1
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.28
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.32
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.38
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.24652298080535653
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.2033571428571429
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.2089304613637203
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 181.27999877929688
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9940606951713562
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 160.55982971191406
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9947395324707031
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoSciFact
+      type: NanoSciFact
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.38
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.56
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.64
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.66
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.38
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.19333333333333333
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.14
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.07200000000000001
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.365
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.54
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.61
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.63
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.5012811403788975
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.4666666666666666
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.4647112383054177
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 90.80000305175781
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9970251321792603
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 197.8948211669922
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9935163259506226
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-information-retrieval
+      name: Sparse Information Retrieval
+    dataset:
+      name: NanoTouche2020
+      type: NanoTouche2020
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.4897959183673469
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.7551020408163265
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.8367346938775511
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.9387755102040817
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.4897959183673469
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.43537414965986393
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.42857142857142855
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.336734693877551
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.03231843040459851
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.08325211008018112
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.13623768956747034
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.20745266217275266
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.3790647958645717
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.6323372206025266
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.2305586843086588
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 78.7755126953125
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9974190592765808
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 140.8109588623047
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.9953866004943848
+      name: Row Sparsity Mean Corpus
+  - task:
+      type: sparse-nano-beir
+      name: Sparse Nano BEIR
+    dataset:
+      name: NanoBEIR mean
+      type: NanoBEIR_mean
+    metrics:
+    - type: dot_accuracy@1
+      value: 0.3607535321821036
+      name: Dot Accuracy@1
+    - type: dot_accuracy@3
+      value: 0.510392464678179
+      name: Dot Accuracy@3
+    - type: dot_accuracy@5
+      value: 0.578210361067504
+      name: Dot Accuracy@5
+    - type: dot_accuracy@10
+      value: 0.6491365777080063
+      name: Dot Accuracy@10
+    - type: dot_precision@1
+      value: 0.3607535321821036
+      name: Dot Precision@1
+    - type: dot_precision@3
+      value: 0.2252851909994767
+      name: Dot Precision@3
+    - type: dot_precision@5
+      value: 0.18035164835164832
+      name: Dot Precision@5
+    - type: dot_precision@10
+      value: 0.1243642072213501
+      name: Dot Precision@10
+    - type: dot_recall@1
+      value: 0.20557882485227402
+      name: Dot Recall@1
+    - type: dot_recall@3
+      value: 0.3045894647137193
+      name: Dot Recall@3
+    - type: dot_recall@5
+      value: 0.3591088399767622
+      name: Dot Recall@5
+    - type: dot_recall@10
+      value: 0.42143486275744696
+      name: Dot Recall@10
+    - type: dot_ndcg@10
+      value: 0.3864128363458742
+      name: Dot Ndcg@10
+    - type: dot_mrr@10
+      value: 0.44907050659091463
+      name: Dot Mrr@10
+    - type: dot_map@100
+      value: 0.31631515718000486
+      name: Dot Map@100
+    - type: row_non_zero_mean_query
+      value: 98.19350081223708
+      name: Row Non Zero Mean Query
+    - type: row_sparsity_mean_query
+      value: 0.9967828622231116
+      name: Row Sparsity Mean Query
+    - type: row_non_zero_mean_corpus
+      value: 158.7868622999925
+      name: Row Non Zero Mean Corpus
+    - type: row_sparsity_mean_corpus
+      value: 0.994797619489523
+      name: Row Sparsity Mean Corpus
+---
+# splade-distilbert-base-uncased trained on Quora Duplicates Questions
+This is a [SPLADE Sparse Encoder](https://www.sbert.net/docs/sparse_encoder/usage/usage.html) model finetuned from [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) on the [quora-duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) dataset using the [sentence-transformers](https://www.SBERT.net) library. It maps sentences & paragraphs to a 30522-dimensional sparse vector space and can be used for semantic search and sparse retrieval.
+## Model Details
+### Model Description
+- **Model Type:** SPLADE Sparse Encoder
+- **Base model:** [distilbert/distilbert-base-uncased](https://huggingface.co/distilbert/distilbert-base-uncased) <!-- at revision 12040accade4e8a0f71eabdb258fecc2e7e948be -->
+- **Maximum Sequence Length:** 256 tokens
+- **Output Dimensionality:** 30522 dimensions
+- **Similarity Function:** Dot Product
+- **Training Dataset:**
+    - [quora-duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates)
+- **Language:** en
+- **License:** apache-2.0
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Documentation:** [Sparse Encoder Documentation](https://www.sbert.net/docs/sparse_encoder/usage/usage.html)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sparse Encoders on Hugging Face](https://huggingface.co/models?library=sentence-transformers&other=sparse-encoder)
+### Full Model Architecture
+```
+SparseEncoder(
+  (0): MLMTransformer({'max_seq_length': 256, 'do_lower_case': False}) with MLMTransformer model: DistilBertForMaskedLM
+  (1): SpladePooling({'pooling_strategy': 'max', 'activation_function': 'relu', 'word_embedding_dimension': 30522})
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SparseEncoder
+# Download from the 🤗 Hub
+model = SparseEncoder("xin0920/splade-distilbert-base-uncased-msmarco-mrl")
+# Run inference
+sentences = [
+    'Which laptop is best under 25000 INR?',
+    'What are the best laptops under 25k?',
+    'What is the best laptop under 45k?',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# (3, 30522)
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Sparse Information Retrieval
+* Datasets: `NanoClimateFEVER`, `NanoDBPedia`, `NanoFEVER`, `NanoFiQA2018`, `NanoHotpotQA`, `NanoMSMARCO`, `NanoNFCorpus`, `NanoNQ`, `NanoQuoraRetrieval`, `NanoSCIDOCS`, `NanoArguAna`, `NanoSciFact` and `NanoTouche2020`
+* Evaluated with [<code>SparseInformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sparse_encoder/evaluation.html#sentence_transformers.sparse_encoder.evaluation.SparseInformationRetrievalEvaluator)
+| Metric                   | NanoClimateFEVER | NanoDBPedia | NanoFEVER  | NanoFiQA2018 | NanoHotpotQA | NanoMSMARCO | NanoNFCorpus | NanoNQ     | NanoQuoraRetrieval | NanoSCIDOCS | NanoArguAna | NanoSciFact | NanoTouche2020 |
+|:-------------------------|:-----------------|:------------|:-----------|:-------------|:-------------|:------------|:-------------|:-----------|:-------------------|:------------|:------------|:------------|:---------------|
+| dot_accuracy@1           | 0.2              | 0.46        | 0.56       | 0.14         | 0.46         | 0.16        | 0.28         | 0.18       | 0.92               | 0.36        | 0.1         | 0.38        | 0.4898         |
+| dot_accuracy@3           | 0.34             | 0.66        | 0.64       | 0.32         | 0.66         | 0.26        | 0.36         | 0.34       | 0.96               | 0.5         | 0.28        | 0.56        | 0.7551         |
+| dot_accuracy@5           | 0.38             | 0.76        | 0.72       | 0.36         | 0.72         | 0.36        | 0.4          | 0.4        | 1.0                | 0.62        | 0.32        | 0.64        | 0.8367         |
+| dot_accuracy@10          | 0.46             | 0.82        | 0.82       | 0.44         | 0.84         | 0.46        | 0.44         | 0.48       | 1.0                | 0.7         | 0.38        | 0.66        | 0.9388         |
+| dot_precision@1          | 0.2              | 0.46        | 0.56       | 0.14         | 0.46         | 0.16        | 0.28         | 0.18       | 0.92               | 0.36        | 0.1         | 0.38        | 0.4898         |
+| dot_precision@3          | 0.12             | 0.46        | 0.2333     | 0.12         | 0.2533       | 0.0867      | 0.1867       | 0.1133     | 0.3733             | 0.26        | 0.0933      | 0.1933      | 0.4354         |
+| dot_precision@5          | 0.084            | 0.412       | 0.156      | 0.104        | 0.176        | 0.072       | 0.18         | 0.08       | 0.256              | 0.192       | 0.064       | 0.14        | 0.4286         |
+| dot_precision@10         | 0.058            | 0.348       | 0.088      | 0.068        | 0.11         | 0.046       | 0.148        | 0.048      | 0.132              | 0.124       | 0.038       | 0.072       | 0.3367         |
+| dot_recall@1             | 0.0883           | 0.025       | 0.5267     | 0.0678       | 0.23         | 0.16        | 0.01         | 0.17       | 0.8207             | 0.0767      | 0.1         | 0.365       | 0.0323         |
+| dot_recall@3             | 0.1533           | 0.0861      | 0.6333     | 0.1457       | 0.38         | 0.26        | 0.0176       | 0.32       | 0.8987             | 0.1617      | 0.28        | 0.54        | 0.0833         |
+| dot_recall@5             | 0.1717           | 0.1356      | 0.7133     | 0.2          | 0.44         | 0.36        | 0.0312       | 0.38       | 0.9727             | 0.1977      | 0.32        | 0.61        | 0.1362         |
+| dot_recall@10            | 0.2223           | 0.2109      | 0.8133     | 0.2637       | 0.55         | 0.46        | 0.0436       | 0.46       | 0.9827             | 0.2547      | 0.38        | 0.63        | 0.2075         |
+| **dot_ndcg@10**          | **0.191**        | **0.4008**  | **0.6697** | **0.1975**   | **0.4642**   | **0.289**   | **0.169**    | **0.3056** | **0.9457**         | **0.264**   | **0.2465**  | **0.5013**  | **0.3791**     |
+| dot_mrr@10               | 0.279            | 0.5754      | 0.6316     | 0.2355       | 0.5763       | 0.237       | 0.3282       | 0.2675     | 0.95               | 0.455       | 0.2034      | 0.4667      | 0.6323         |
+| dot_map@100              | 0.1449           | 0.2348      | 0.6282     | 0.1473       | 0.3782       | 0.2547      | 0.0487       | 0.2611     | 0.9233             | 0.1868      | 0.2089      | 0.4647      | 0.2306         |
+| row_non_zero_mean_query  | 83.12            | 110.18      | 96.78      | 80.34        | 87.26        | 96.06       | 122.94       | 79.22      | 73.84              | 95.92       | 181.28      | 90.8        | 78.7755        |
+| row_sparsity_mean_query  | 0.9973           | 0.9964      | 0.9968     | 0.9974       | 0.9971       | 0.9969      | 0.996        | 0.9974     | 0.9976             | 0.9969      | 0.9941      | 0.997       | 0.9974         |
+| row_non_zero_mean_corpus | 196.8254         | 146.9065    | 219.1213   | 125.9158     | 166.4719     | 105.462     | 199.5936     | 145.2502   | 74.9677            | 184.4491    | 160.5598    | 197.8948    | 140.811        |
+| row_sparsity_mean_corpus | 0.9936           | 0.9952      | 0.9928     | 0.9959       | 0.9945       | 0.9965      | 0.9935       | 0.9952     | 0.9975             | 0.994       | 0.9947      | 0.9935      | 0.9954         |
+#### Sparse Nano BEIR
+* Dataset: `NanoBEIR_mean`
+* Evaluated with [<code>SparseNanoBEIREvaluator</code>](https://sbert.net/docs/package_reference/sparse_encoder/evaluation.html#sentence_transformers.sparse_encoder.evaluation.SparseNanoBEIREvaluator) with these parameters:
+  ```json
+  {
+      "dataset_names": [
+          "climatefever",
+          "dbpedia",
+          "fever",
+          "fiqa2018",
+          "hotpotqa",
+          "msmarco",
+          "nfcorpus",
+          "nq",
+          "quoraretrieval",
+          "scidocs",
+          "arguana",
+          "scifact",
+          "touche2020"
+      ]
+  }
+  ```
+| Metric                   | Value      |
+|:-------------------------|:-----------|
+| dot_accuracy@1           | 0.3608     |
+| dot_accuracy@3           | 0.5104     |
+| dot_accuracy@5           | 0.5782     |
+| dot_accuracy@10          | 0.6491     |
+| dot_precision@1          | 0.3608     |
+| dot_precision@3          | 0.2253     |
+| dot_precision@5          | 0.1804     |
+| dot_precision@10         | 0.1244     |
+| dot_recall@1             | 0.2056     |
+| dot_recall@3             | 0.3046     |
+| dot_recall@5             | 0.3591     |
+| dot_recall@10            | 0.4214     |
+| **dot_ndcg@10**          | **0.3864** |
+| dot_mrr@10               | 0.4491     |
+| dot_map@100              | 0.3163     |
+| row_non_zero_mean_query  | 98.1935    |
+| row_sparsity_mean_query  | 0.9968     |
+| row_non_zero_mean_corpus | 158.7869   |
+| row_sparsity_mean_corpus | 0.9948     |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### quora-duplicates
+* Dataset: [quora-duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) at [451a485](https://huggingface.co/datasets/sentence-transformers/quora-duplicates/tree/451a4850bd141edb44ade1b5828c259abd762cdb)
+* Size: 99,000 training samples
+* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                           | positive                                                                          | negative                                                                          |
+  |:--------|:---------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
+  | type    | string                                                                           | string                                                                            | string                                                                            |
+  | details | <ul><li>min: 6 tokens</li><li>mean: 14.1 tokens</li><li>max: 39 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 13.83 tokens</li><li>max: 41 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 15.21 tokens</li><li>max: 75 tokens</li></ul> |
+* Samples:
+  | anchor                                                                | positive                                                                   | negative                                                                                                                                                                                                                               |
+  |:----------------------------------------------------------------------|:---------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>What are the best GMAT coaching institutes in Delhi NCR?</code> | <code>Which are the best GMAT coaching institutes in Delhi/NCR?</code>     | <code>What are the best GMAT coaching institutes in Delhi-Noida Area?</code>                                                                                                                                                           |
+  | <code>Is a third world war coming?</code>                             | <code>Is World War 3 more imminent than expected?</code>                   | <code>Since the UN is unable to control terrorism and groups like ISIS, al-Qaeda and countries that promote terrorism (even though it consumed those countries), can we assume that the world is heading towards World War III?</code> |
+  | <code>Should I build iOS or Android apps first?</code>                | <code>Should people choose Android or iOS first to build their App?</code> | <code>How much more effort is it to build your app on both iOS and Android?</code>                                                                                                                                                     |
+* Loss: [<code>SpladeLoss</code>](https://sbert.net/docs/package_reference/sparse_encoder/losses.html#spladeloss) with these parameters:
+  ```json
+  {'loss': SparseMultipleNegativesRankingLoss(
+    (model): SparseEncoder(
+      (0): MLMTransformer({'max_seq_length': 256, 'do_lower_case': False}) with MLMTransformer model: DistilBertForMaskedLM
+      (1): SpladePooling({'pooling_strategy': 'max', 'activation_function': 'relu', 'word_embedding_dimension': None})
+    )
+    (cross_entropy_loss): CrossEntropyLoss()
+  ), 'lambda_corpus': 3e-05, 'lambda_query': 5e-05}
+  ```
+### Evaluation Dataset
+#### quora-duplicates
+* Dataset: [quora-duplicates](https://huggingface.co/datasets/sentence-transformers/quora-duplicates) at [451a485](https://huggingface.co/datasets/sentence-transformers/quora-duplicates/tree/451a4850bd141edb44ade1b5828c259abd762cdb)
+* Size: 1,000 evaluation samples
+* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
+* Approximate statistics based on the first 1000 samples:
+  |         | anchor                                                                            | positive                                                                          | negative                                                                          |
+  |:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
+  | type    | string                                                                            | string                                                                            | string                                                                            |
+  | details | <ul><li>min: 6 tokens</li><li>mean: 14.05 tokens</li><li>max: 40 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.14 tokens</li><li>max: 44 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 14.56 tokens</li><li>max: 60 tokens</li></ul> |
+* Samples:
+  | anchor                                                             | positive                                                    | negative                                                         |
+  |:-------------------------------------------------------------------|:------------------------------------------------------------|:-----------------------------------------------------------------|
+  | <code>What happens if we use petrol in diesel vehicles?</code>     | <code>Why can't we use petrol in diesel?</code>             | <code>Why are diesel engines noisier than petrol engines?</code> |
+  | <code>Why is Saltwater taffy candy imported in Switzerland?</code> | <code>Why is Saltwater taffy candy imported in Laos?</code> | <code>Is salt a consumer product?</code>                         |
+  | <code>Which is your favourite film in 2016?</code>                 | <code>What movie is the best movie of 2016?</code>          | <code>What will the best movie of 2017 be?</code>                |
+* Loss: [<code>SpladeLoss</code>](https://sbert.net/docs/package_reference/sparse_encoder/losses.html#spladeloss) with these parameters:
+  ```json
+  {'loss': SparseMultipleNegativesRankingLoss(
+    (model): SparseEncoder(
+      (0): MLMTransformer({'max_seq_length': 256, 'do_lower_case': False}) with MLMTransformer model: DistilBertForMaskedLM
+      (1): SpladePooling({'pooling_strategy': 'max', 'activation_function': 'relu', 'word_embedding_dimension': None})
+    )
+    (cross_entropy_loss): CrossEntropyLoss()
+  ), 'lambda_corpus': 3e-05, 'lambda_query': 5e-05}
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: steps
+- `per_device_train_batch_size`: 12
+- `per_device_eval_batch_size`: 12
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 1
+- `bf16`: True
+- `load_best_model_at_end`: True
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: steps
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 12
+- `per_device_eval_batch_size`: 12
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 1
+- `eval_accumulation_steps`: None
+- `torch_empty_cache_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 1
+- `max_steps`: -1
+- `lr_scheduler_type`: linear
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.0
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: True
+- `fp16`: False
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: True
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: None
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `include_for_metrics`: []
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `eval_on_start`: False
+- `use_liger_kernel`: False
+- `eval_use_gather_object`: False
+- `average_tokens_across_devices`: False
+- `prompts`: None
+- `batch_sampler`: batch_sampler
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch  | Step | Training Loss | NanoClimateFEVER_dot_ndcg@10 | NanoDBPedia_dot_ndcg@10 | NanoFEVER_dot_ndcg@10 | NanoFiQA2018_dot_ndcg@10 | NanoHotpotQA_dot_ndcg@10 | NanoMSMARCO_dot_ndcg@10 | NanoNFCorpus_dot_ndcg@10 | NanoNQ_dot_ndcg@10 | NanoQuoraRetrieval_dot_ndcg@10 | NanoSCIDOCS_dot_ndcg@10 | NanoArguAna_dot_ndcg@10 | NanoSciFact_dot_ndcg@10 | NanoTouche2020_dot_ndcg@10 | NanoBEIR_mean_dot_ndcg@10 |
+|:------:|:----:|:-------------:|:----------------------------:|:-----------------------:|:---------------------:|:------------------------:|:------------------------:|:-----------------------:|:------------------------:|:------------------:|:------------------------------:|:-----------------------:|:-----------------------:|:-----------------------:|:--------------------------:|:-------------------------:|
+| 0.1938 | 200  | 12.7715       | -                            | -                       | -                     | -                        | -                        | -                       | -                        | -                  | -                              | -                       | -                       | -                       | -                          | -                         |
+| 0.3876 | 400  | 0.2719        | -                            | -                       | -                     | -                        | -                        | -                       | -                        | -                  | -                              | -                       | -                       | -                       | -                          | -                         |
+| 0.5814 | 600  | 0.234         | -                            | -                       | -                     | -                        | -                        | -                       | -                        | -                  | -                              | -                       | -                       | -                       | -                          | -                         |
+| 0.7752 | 800  | 0.2068        | -                            | -                       | -                     | -                        | -                        | -                       | -                        | -                  | -                              | -                       | -                       | -                       | -                          | -                         |
+| 0.9690 | 1000 | 0.2041        | -                            | -                       | -                     | -                        | -                        | -                       | -                        | -                  | -                              | -                       | -                       | -                       | -                          | -                         |
+| -1     | -1   | -             | 0.1910                       | 0.4008                  | 0.6697                | 0.1975                   | 0.4642                   | 0.2890                  | 0.1690                   | 0.3056             | 0.9457                         | 0.2640                  | 0.2465                  | 0.5013                  | 0.3791                     | 0.3864                    |
+### Framework Versions
+- Python: 3.9.22
+- Sentence Transformers: 4.2.0.dev0
+- Transformers: 4.52.1
+- PyTorch: 2.6.0+cu124
+- Accelerate: 1.7.0
+- Datasets: 3.6.0
+- Tokenizers: 0.21.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### SpladeLoss
+```bibtex
+@misc{formal2022distillationhardnegativesampling,
+      title={From Distillation to Hard Negative Sampling: Making Sparse Neural IR Models More Effective},
+      author={Thibault Formal and Carlos Lassance and Benjamin Piwowarski and Stéphane Clinchant},
+      year={2022},
+      eprint={2205.04733},
+      archivePrefix={arXiv},
+      primaryClass={cs.IR},
+      url={https://arxiv.org/abs/2205.04733},
+}
+```
+#### SparseMultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+#### FlopsLoss
+```bibtex
+@article{paria2020minimizing,
+    title={Minimizing flops to learn efficient sparse representations},
+    author={Paria, Biswajit and Yeh, Chih-Kuan and Yen, Ian EH and Xu, Ning and Ravikumar, Pradeep and P{'o}czos, Barnab{'a}s},
+    journal={arXiv preprint arXiv:2004.05665},
+    year={2020}
+    }
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForMaskedLM"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.1",
+  "vocab_size": 30522
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "model_type": "SparseEncoder",
+  "__version__": {
+    "sentence_transformers": "4.2.0.dev0",
+    "transformers": "4.52.1",
+    "pytorch": "2.6.0+cu124"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "dot"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b8c9578ec3b7dc6eba96a15103f75a0e2a1d53d7a47b564231f029e5233e6e0
+size 267954768

modules.json ADDED Viewed

	@@ -0,0 +1,14 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.sparse_encoder.models.MLMTransformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_SpladePooling",
+    "type": "sentence_transformers.sparse_encoder.models.SpladePooling"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "max_seq_length": 256,
+    "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff