Add new SentenceTransformer model

Browse files

Files changed (11) hide show

1_Pooling/config.json +10 -0
README.md +843 -0
config.json +26 -0
config_sentence_transformers.json +10 -0
model.safetensors +3 -0
modules.json +20 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +62 -0
vocab.txt +0 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 768,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": true
+}

README.md ADDED Viewed

	@@ -0,0 +1,843 @@

+---
+language:
+- en
+license: apache-2.0
+tags:
+- sentence-transformers
+- sentence-similarity
+- feature-extraction
+- generated_from_trainer
+- dataset_size:73
+- loss:MatryoshkaLoss
+- loss:MultipleNegativesRankingLoss
+base_model: thenlper/gte-base
+widget:
+- source_sentence: What is the maximum value of equipment that can be purchased with
+    a CUE Student Research Project Grant?
+  sentences:
+  - Equipment costs (valued up to $1000).
+  - Variable awards to recognize and reward academic achievement at the senior high
+    school level and to encourage students to pursue post -secondary studies.
+  - The Amazon Future Engineer Scholarship provides students with an opportunity to
+    upgrade their careers with a $7,500 CAD/year scholarship available for up to four
+    years.
+- source_sentence: What is the minimum distance a recipient's hometown must be from
+    Concordia University of Edmonton to be eligible for the Alberta Blue Cross Away
+    from Home Scholarship?
+  sentences:
+  - Three awards are available
+  - The recipient’s hometown must be at least 100 kilometres from Concordia University
+    of Edmonton.
+  - 'Application Deadline: September 1'
+- source_sentence: According to the selection criteria, what level of subjects are
+    used to determine the academic standing of a potential Alberta Blue Cross Away
+    from Home Scholarship recipient?
+  sentences:
+  - Selection is ba sed on the academic standing of 30 -level subjects used for admission.
+  - 'These eligible and ineligible lists are not exhaustive. Doubts about the eligibility
+    of expenses should be directed to the ORI’s Research Administration Service s
+    (RAS): [email protected] .'
+  - '*Value: $11000 Master’s; $14,000 Doctoral'
+- source_sentence: According to the text, how many days does a grant recipient have
+    to submit a final report after the grant ends?
+  sentences:
+  - All Fall grant recipients are expected to submit an abstract to present an oral
+    and/or poster presentation of their work, either in its progression or final stage.
+  - a business program offered by an Alberta college, polytechnic, or university that
+    offers the prerequisite courses required for entrance into the CPA Professional
+    Education Program (CPA PEP).
+  - The applicant is required to complete and submit a final report within 5 days
+    of the end of the grant.
+- source_sentence: In what format should applicants acknowledge the funding provided
+    by Concordia University of Edmonton for their Student Project Grant?
+  sentences:
+  - All oral or poster presentations, publications, including public messages, arising
+    from research supported by CUE grants must acknowledge the support of the institution.
+    Acknowledgement can be in the written format, such as " This research is funded
+    by the generous support of Concordia University of Edmonton through their CUE
+    Student Research Project Grants program ", or similar phrasing.
+  - This $1,000 scholarship is awarded to post -secondary students who have completed
+    at least one year towards their Bachelor of Science with a focus on Computer Science,
+    achieved an average GPA of 3.5 or higher, and are still enrolled in post -secondary
+    studie s.
+  - The recipient will be selected based on the highest grade in MARK320. In the event
+    of a tie, preference will be given to the student with the highest cumulative
+    GPA.
+pipeline_tag: sentence-similarity
+library_name: sentence-transformers
+metrics:
+- cosine_accuracy@1
+- cosine_accuracy@3
+- cosine_accuracy@5
+- cosine_accuracy@10
+- cosine_precision@1
+- cosine_precision@3
+- cosine_precision@5
+- cosine_precision@10
+- cosine_recall@1
+- cosine_recall@3
+- cosine_recall@5
+- cosine_recall@10
+- cosine_ndcg@10
+- cosine_mrr@10
+- cosine_map@100
+model-index:
+- name: BGE base Financial Matryoshka
+  results:
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 768
+      type: dim_768
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.5555555555555556
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 1.0
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 1.0
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 1.0
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.5555555555555556
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.3333333333333333
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.2
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.1
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.5555555555555556
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 1.0
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 1.0
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 1.0
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.8214210289682637
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.7592592592592592
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.7592592592592592
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 512
+      type: dim_512
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.4444444444444444
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.8888888888888888
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 1.0
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 1.0
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.4444444444444444
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.2962962962962963
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.2
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.1
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.4444444444444444
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.8888888888888888
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 1.0
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 1.0
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.7678413135022636
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.6888888888888889
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.6888888888888889
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 256
+      type: dim_256
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.4444444444444444
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 1.0
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 1.0
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 1.0
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.4444444444444444
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.3333333333333333
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.2
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.1
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.4444444444444444
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 1.0
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 1.0
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 1.0
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.7658654734127082
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.6851851851851851
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.6851851851851851
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 128
+      type: dim_128
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.4444444444444444
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.8888888888888888
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.8888888888888888
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.8888888888888888
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.4444444444444444
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.2962962962962963
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.17777777777777778
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.08888888888888889
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.4444444444444444
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.8888888888888888
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.8888888888888888
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.8888888888888888
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.7103099178571526
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.6481481481481483
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.6521164021164021
+      name: Cosine Map@100
+  - task:
+      type: information-retrieval
+      name: Information Retrieval
+    dataset:
+      name: dim 64
+      type: dim_64
+    metrics:
+    - type: cosine_accuracy@1
+      value: 0.6666666666666666
+      name: Cosine Accuracy@1
+    - type: cosine_accuracy@3
+      value: 0.6666666666666666
+      name: Cosine Accuracy@3
+    - type: cosine_accuracy@5
+      value: 0.7777777777777778
+      name: Cosine Accuracy@5
+    - type: cosine_accuracy@10
+      value: 0.8888888888888888
+      name: Cosine Accuracy@10
+    - type: cosine_precision@1
+      value: 0.6666666666666666
+      name: Cosine Precision@1
+    - type: cosine_precision@3
+      value: 0.2222222222222222
+      name: Cosine Precision@3
+    - type: cosine_precision@5
+      value: 0.15555555555555556
+      name: Cosine Precision@5
+    - type: cosine_precision@10
+      value: 0.08888888888888889
+      name: Cosine Precision@10
+    - type: cosine_recall@1
+      value: 0.6666666666666666
+      name: Cosine Recall@1
+    - type: cosine_recall@3
+      value: 0.6666666666666666
+      name: Cosine Recall@3
+    - type: cosine_recall@5
+      value: 0.7777777777777778
+      name: Cosine Recall@5
+    - type: cosine_recall@10
+      value: 0.8888888888888888
+      name: Cosine Recall@10
+    - type: cosine_ndcg@10
+      value: 0.7515566546007473
+      name: Cosine Ndcg@10
+    - type: cosine_mrr@10
+      value: 0.7103174603174602
+      name: Cosine Mrr@10
+    - type: cosine_map@100
+      value: 0.71494708994709
+      name: Cosine Map@100
+---
+# BGE base Financial Matryoshka
+This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [thenlper/gte-base](https://huggingface.co/thenlper/gte-base) on the json dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
+## Model Details
+### Model Description
+- **Model Type:** Sentence Transformer
+- **Base model:** [thenlper/gte-base](https://huggingface.co/thenlper/gte-base) <!-- at revision c078288308d8dee004ab72c6191778064285ec0c -->
+- **Maximum Sequence Length:** 512 tokens
+- **Output Dimensionality:** 768 dimensions
+- **Similarity Function:** Cosine Similarity
+- **Training Dataset:**
+    - json
+- **Language:** en
+- **License:** apache-2.0
+### Model Sources
+- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
+- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
+- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
+### Full Model Architecture
+```
+SentenceTransformer(
+  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
+  (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
+  (2): Normalize()
+)
+```
+## Usage
+### Direct Usage (Sentence Transformers)
+First install the Sentence Transformers library:
+```bash
+pip install -U sentence-transformers
+```
+Then you can load this model and run inference.
+```python
+from sentence_transformers import SentenceTransformer
+# Download from the 🤗 Hub
+model = SentenceTransformer("TatvaRA/gte-base-finetuned-schorlaships-matryonshka")
+# Run inference
+sentences = [
+    'In what format should applicants acknowledge the funding provided by Concordia University of Edmonton for their Student Project Grant?',
+    'All oral or poster presentations, publications, including public messages, arising from research supported by CUE grants must acknowledge the support of the institution. Acknowledgement can be in the written format, such as " This research is funded by the generous support of Concordia University of Edmonton through their CUE Student Research Project Grants program ", or similar phrasing.',
+    'The recipient will be selected based on the highest grade in MARK320. In the event of a tie, preference will be given to the student with the highest cumulative GPA.',
+]
+embeddings = model.encode(sentences)
+print(embeddings.shape)
+# [3, 768]
+# Get the similarity scores for the embeddings
+similarities = model.similarity(embeddings, embeddings)
+print(similarities.shape)
+# [3, 3]
+```
+<!--
+### Direct Usage (Transformers)
+<details><summary>Click to see the direct usage in Transformers</summary>
+</details>
+-->
+<!--
+### Downstream Usage (Sentence Transformers)
+You can finetune this model on your own dataset.
+<details><summary>Click to expand</summary>
+</details>
+-->
+<!--
+### Out-of-Scope Use
+*List how the model may foreseeably be misused and address what users ought not to do with the model.*
+-->
+## Evaluation
+### Metrics
+#### Information Retrieval
+* Dataset: `dim_768`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 768
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.5556     |
+| cosine_accuracy@3   | 1.0        |
+| cosine_accuracy@5   | 1.0        |
+| cosine_accuracy@10  | 1.0        |
+| cosine_precision@1  | 0.5556     |
+| cosine_precision@3  | 0.3333     |
+| cosine_precision@5  | 0.2        |
+| cosine_precision@10 | 0.1        |
+| cosine_recall@1     | 0.5556     |
+| cosine_recall@3     | 1.0        |
+| cosine_recall@5     | 1.0        |
+| cosine_recall@10    | 1.0        |
+| **cosine_ndcg@10**  | **0.8214** |
+| cosine_mrr@10       | 0.7593     |
+| cosine_map@100      | 0.7593     |
+#### Information Retrieval
+* Dataset: `dim_512`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 512
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.4444     |
+| cosine_accuracy@3   | 0.8889     |
+| cosine_accuracy@5   | 1.0        |
+| cosine_accuracy@10  | 1.0        |
+| cosine_precision@1  | 0.4444     |
+| cosine_precision@3  | 0.2963     |
+| cosine_precision@5  | 0.2        |
+| cosine_precision@10 | 0.1        |
+| cosine_recall@1     | 0.4444     |
+| cosine_recall@3     | 0.8889     |
+| cosine_recall@5     | 1.0        |
+| cosine_recall@10    | 1.0        |
+| **cosine_ndcg@10**  | **0.7678** |
+| cosine_mrr@10       | 0.6889     |
+| cosine_map@100      | 0.6889     |
+#### Information Retrieval
+* Dataset: `dim_256`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 256
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.4444     |
+| cosine_accuracy@3   | 1.0        |
+| cosine_accuracy@5   | 1.0        |
+| cosine_accuracy@10  | 1.0        |
+| cosine_precision@1  | 0.4444     |
+| cosine_precision@3  | 0.3333     |
+| cosine_precision@5  | 0.2        |
+| cosine_precision@10 | 0.1        |
+| cosine_recall@1     | 0.4444     |
+| cosine_recall@3     | 1.0        |
+| cosine_recall@5     | 1.0        |
+| cosine_recall@10    | 1.0        |
+| **cosine_ndcg@10**  | **0.7659** |
+| cosine_mrr@10       | 0.6852     |
+| cosine_map@100      | 0.6852     |
+#### Information Retrieval
+* Dataset: `dim_128`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 128
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.4444     |
+| cosine_accuracy@3   | 0.8889     |
+| cosine_accuracy@5   | 0.8889     |
+| cosine_accuracy@10  | 0.8889     |
+| cosine_precision@1  | 0.4444     |
+| cosine_precision@3  | 0.2963     |
+| cosine_precision@5  | 0.1778     |
+| cosine_precision@10 | 0.0889     |
+| cosine_recall@1     | 0.4444     |
+| cosine_recall@3     | 0.8889     |
+| cosine_recall@5     | 0.8889     |
+| cosine_recall@10    | 0.8889     |
+| **cosine_ndcg@10**  | **0.7103** |
+| cosine_mrr@10       | 0.6481     |
+| cosine_map@100      | 0.6521     |
+#### Information Retrieval
+* Dataset: `dim_64`
+* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator) with these parameters:
+  ```json
+  {
+      "truncate_dim": 64
+  }
+  ```
+| Metric              | Value      |
+|:--------------------|:-----------|
+| cosine_accuracy@1   | 0.6667     |
+| cosine_accuracy@3   | 0.6667     |
+| cosine_accuracy@5   | 0.7778     |
+| cosine_accuracy@10  | 0.8889     |
+| cosine_precision@1  | 0.6667     |
+| cosine_precision@3  | 0.2222     |
+| cosine_precision@5  | 0.1556     |
+| cosine_precision@10 | 0.0889     |
+| cosine_recall@1     | 0.6667     |
+| cosine_recall@3     | 0.6667     |
+| cosine_recall@5     | 0.7778     |
+| cosine_recall@10    | 0.8889     |
+| **cosine_ndcg@10**  | **0.7516** |
+| cosine_mrr@10       | 0.7103     |
+| cosine_map@100      | 0.7149     |
+<!--
+## Bias, Risks and Limitations
+*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
+-->
+<!--
+### Recommendations
+*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
+-->
+## Training Details
+### Training Dataset
+#### json
+* Dataset: json
+* Size: 73 training samples
+* Columns: <code>anchor</code> and <code>positive</code>
+* Approximate statistics based on the first 73 samples:
+  |         | anchor                                                                            | positive                                                                           |
+  |:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
+  | type    | string                                                                            | string                                                                             |
+  | details | <ul><li>min: 14 tokens</li><li>mean: 23.0 tokens</li><li>max: 41 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 32.74 tokens</li><li>max: 346 tokens</li></ul> |
+* Samples:
+  | anchor                                                                                                                                              | positive                                                                                                                                                                                                                                                                                                                                                                                                             |
+  |:----------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+  | <code>What specific type of students are the Alberta Innovates Graduate Student Scholarships designed to support?</code>                            | <code>The Alberta Innovates Graduate Student Scholarships support academically superior graduate students <br>who are receiving training and conducting research in areas that are strategically important to Alberta’s <br>economy.</code>                                                                                                                                                                          |
+  | <code>What is the specific date by which students must submit their reports for the Spring 2025 grant period?</code>                                | <code>Report due date April 20th (5 days post grant closure)</code>                                                                                                                                                                                                                                                                                                                                                  |
+  | <code>In what format should applicants acknowledge the funding provided by Concordia University of Edmonton for their Student Project Grant?</code> | <code>All oral or poster presentations, publications, including public messages, arising from research supported by CUE grants must acknowledge the support of the institution. Acknowledgement can be in the written format, such as " This research is funded by the generous support of Concordia University of Edmonton through their CUE Student Research Project Grants program ", or similar phrasing.</code> |
+* Loss: [<code>MatryoshkaLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#matryoshkaloss) with these parameters:
+  ```json
+  {
+      "loss": "MultipleNegativesRankingLoss",
+      "matryoshka_dims": [
+          768,
+          512,
+          256,
+          128,
+          64
+      ],
+      "matryoshka_weights": [
+          1,
+          1,
+          1,
+          1,
+          1
+      ],
+      "n_dims_per_step": -1
+  }
+  ```
+### Training Hyperparameters
+#### Non-Default Hyperparameters
+- `eval_strategy`: epoch
+- `per_device_train_batch_size`: 32
+- `per_device_eval_batch_size`: 16
+- `gradient_accumulation_steps`: 16
+- `learning_rate`: 2e-05
+- `num_train_epochs`: 4
+- `lr_scheduler_type`: cosine
+- `warmup_ratio`: 0.1
+- `fp16`: True
+- `load_best_model_at_end`: True
+- `optim`: adamw_torch_fused
+- `batch_sampler`: no_duplicates
+#### All Hyperparameters
+<details><summary>Click to expand</summary>
+- `overwrite_output_dir`: False
+- `do_predict`: False
+- `eval_strategy`: epoch
+- `prediction_loss_only`: True
+- `per_device_train_batch_size`: 32
+- `per_device_eval_batch_size`: 16
+- `per_gpu_train_batch_size`: None
+- `per_gpu_eval_batch_size`: None
+- `gradient_accumulation_steps`: 16
+- `eval_accumulation_steps`: None
+- `learning_rate`: 2e-05
+- `weight_decay`: 0.0
+- `adam_beta1`: 0.9
+- `adam_beta2`: 0.999
+- `adam_epsilon`: 1e-08
+- `max_grad_norm`: 1.0
+- `num_train_epochs`: 4
+- `max_steps`: -1
+- `lr_scheduler_type`: cosine
+- `lr_scheduler_kwargs`: {}
+- `warmup_ratio`: 0.1
+- `warmup_steps`: 0
+- `log_level`: passive
+- `log_level_replica`: warning
+- `log_on_each_node`: True
+- `logging_nan_inf_filter`: True
+- `save_safetensors`: True
+- `save_on_each_node`: False
+- `save_only_model`: False
+- `restore_callback_states_from_checkpoint`: False
+- `no_cuda`: False
+- `use_cpu`: False
+- `use_mps_device`: False
+- `seed`: 42
+- `data_seed`: None
+- `jit_mode_eval`: False
+- `use_ipex`: False
+- `bf16`: False
+- `fp16`: True
+- `fp16_opt_level`: O1
+- `half_precision_backend`: auto
+- `bf16_full_eval`: False
+- `fp16_full_eval`: False
+- `tf32`: None
+- `local_rank`: 0
+- `ddp_backend`: None
+- `tpu_num_cores`: None
+- `tpu_metrics_debug`: False
+- `debug`: []
+- `dataloader_drop_last`: False
+- `dataloader_num_workers`: 0
+- `dataloader_prefetch_factor`: None
+- `past_index`: -1
+- `disable_tqdm`: False
+- `remove_unused_columns`: True
+- `label_names`: None
+- `load_best_model_at_end`: True
+- `ignore_data_skip`: False
+- `fsdp`: []
+- `fsdp_min_num_params`: 0
+- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
+- `fsdp_transformer_layer_cls_to_wrap`: None
+- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
+- `deepspeed`: None
+- `label_smoothing_factor`: 0.0
+- `optim`: adamw_torch_fused
+- `optim_args`: None
+- `adafactor`: False
+- `group_by_length`: False
+- `length_column_name`: length
+- `ddp_find_unused_parameters`: None
+- `ddp_bucket_cap_mb`: None
+- `ddp_broadcast_buffers`: False
+- `dataloader_pin_memory`: True
+- `dataloader_persistent_workers`: False
+- `skip_memory_metrics`: True
+- `use_legacy_prediction_loop`: False
+- `push_to_hub`: False
+- `resume_from_checkpoint`: None
+- `hub_model_id`: None
+- `hub_strategy`: every_save
+- `hub_private_repo`: False
+- `hub_always_push`: False
+- `gradient_checkpointing`: False
+- `gradient_checkpointing_kwargs`: None
+- `include_inputs_for_metrics`: False
+- `eval_do_concat_batches`: True
+- `fp16_backend`: auto
+- `push_to_hub_model_id`: None
+- `push_to_hub_organization`: None
+- `mp_parameters`:
+- `auto_find_batch_size`: False
+- `full_determinism`: False
+- `torchdynamo`: None
+- `ray_scope`: last
+- `ddp_timeout`: 1800
+- `torch_compile`: False
+- `torch_compile_backend`: None
+- `torch_compile_mode`: None
+- `dispatch_batches`: None
+- `split_batches`: None
+- `include_tokens_per_second`: False
+- `include_num_input_tokens_seen`: False
+- `neftune_noise_alpha`: None
+- `optim_target_modules`: None
+- `batch_eval_metrics`: False
+- `prompts`: None
+- `batch_sampler`: no_duplicates
+- `multi_dataset_batch_sampler`: proportional
+</details>
+### Training Logs
+| Epoch   | Step  | dim_768_cosine_ndcg@10 | dim_512_cosine_ndcg@10 | dim_256_cosine_ndcg@10 | dim_128_cosine_ndcg@10 | dim_64_cosine_ndcg@10 |
+|:-------:|:-----:|:----------------------:|:----------------------:|:----------------------:|:----------------------:|:---------------------:|
+| 1.0     | 1     | 0.7249                 | 0.7249                 | 0.7473                 | 0.7026                 | 0.6686                |
+| 2.0     | 2     | 0.7619                 | 0.7249                 | 0.7533                 | 0.7026                 | 0.7480                |
+| **3.0** | **3** | **0.7804**             | **0.7619**             | **0.7659**             | **0.7103**             | **0.7496**            |
+| 4.0     | 4     | 0.8214                 | 0.7678                 | 0.7659                 | 0.7103                 | 0.7516                |
+* The bold row denotes the saved checkpoint.
+### Framework Versions
+- Python: 3.11.12
+- Sentence Transformers: 4.1.0
+- Transformers: 4.41.2
+- PyTorch: 2.1.2+cu121
+- Accelerate: 1.5.2
+- Datasets: 2.19.1
+- Tokenizers: 0.19.1
+## Citation
+### BibTeX
+#### Sentence Transformers
+```bibtex
+@inproceedings{reimers-2019-sentence-bert,
+    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
+    author = "Reimers, Nils and Gurevych, Iryna",
+    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
+    month = "11",
+    year = "2019",
+    publisher = "Association for Computational Linguistics",
+    url = "https://arxiv.org/abs/1908.10084",
+}
+```
+#### MatryoshkaLoss
+```bibtex
+@misc{kusupati2024matryoshka,
+    title={Matryoshka Representation Learning},
+    author={Aditya Kusupati and Gantavya Bhatt and Aniket Rege and Matthew Wallingford and Aditya Sinha and Vivek Ramanujan and William Howard-Snyder and Kaifeng Chen and Sham Kakade and Prateek Jain and Ali Farhadi},
+    year={2024},
+    eprint={2205.13147},
+    archivePrefix={arXiv},
+    primaryClass={cs.LG}
+}
+```
+#### MultipleNegativesRankingLoss
+```bibtex
+@misc{henderson2017efficient,
+    title={Efficient Natural Language Response Suggestion for Smart Reply},
+    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
+    year={2017},
+    eprint={1705.00652},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+}
+```
+<!--
+## Glossary
+*Clearly define terms in order to be accessible across audiences.*
+-->
+<!--
+## Model Card Authors
+*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
+-->
+<!--
+## Model Card Contact
+*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
+-->

config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "_name_or_path": "thenlper/gte-base",
+  "architectures": [
+    "BertModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "__version__": {
+    "sentence_transformers": "4.1.0",
+    "transformers": "4.41.2",
+    "pytorch": "2.1.2+cu121"
+  },
+  "prompts": {},
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a7ad835eda1ebecdf38d2d5a1e53b801cc477f069351ab937ca61ad67a5993c
+size 437951328

modules.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 512,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,62 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "max_length": 128,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
+  "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "[SEP]",
+  "stride": 0,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff