Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- README.md +467 -0
- config.json +27 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +14 -0
- optimizer.pt +3 -0
- rng_state.pth +3 -0
- scaler.pt +3 -0
- scheduler.pt +3 -0
- sentence_bert_config.json +4 -0
- sentencepiece.bpe.model +3 -0
- special_tokens_map.json +51 -0
- tokenizer.json +3 -0
- tokenizer_config.json +63 -0
- trainer_state.json +55 -0
- training_args.bin +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,467 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- dense
|
7 |
+
- generated_from_trainer
|
8 |
+
- dataset_size:16129
|
9 |
+
- loss:MultipleNegativesRankingLoss
|
10 |
+
widget:
|
11 |
+
- source_sentence: Rofr/Rofo/Rofn
|
12 |
+
sentences:
|
13 |
+
- between the parties is not executed within thirty (30) days following delivery,
|
14 |
+
of such notice to Snap, Snap shall be free thereafter to enter into an such an
|
15 |
+
agreement with any third party.
|
16 |
+
- 'This Agreement contains the entire agreement of the parties and SYNTEL shall
|
17 |
+
not be bound by any other different, additional, or further agreements or understandings
|
18 |
+
except as consented to in writing by the Chief Administrative Officer or Director,
|
19 |
+
Human Resources of SYNTEL. This Agreement shall be binding upon and inure to the
|
20 |
+
benefit of the parties hereto and their respective successors and assigns. No
|
21 |
+
amendment hereof shall be effective unless contained in a written instrument signed
|
22 |
+
by the parties hereto. No delay or omission by either party to exercise any right
|
23 |
+
or power under this Agreement shall impair such right or power or be construed
|
24 |
+
to be a waiver thereof. A waiver by either party of any of the covenants to be
|
25 |
+
performed by the other party or of any breach shall not be construed to be a waiver
|
26 |
+
of any succeeding breach or of any other covenant. If any portion of any provision
|
27 |
+
of the Agreement is declared invalid, the offending portion of such provision
|
28 |
+
shall be deemed severable from such provision and the remaining provisions of
|
29 |
+
the Agreement, which shall remain in full force and effect. EMPLOYEE shall not
|
30 |
+
assign or transfer this Agreement without the prior written consent of SYNTEL.
|
31 |
+
EMPLOYEE’s employment with SYNTEL is at will and may be terminated by SYNTEL at
|
32 |
+
any time with or without cause, and with or without notice. All rights and remedies
|
33 |
+
provided for in this Agreement shall be cumulative and in addition to and not
|
34 |
+
in lieu of any other rights or remedies available to either party at law, in equity,
|
35 |
+
or otherwise. Paragraphs 2, 3, 6, 7, 8, 9, 10, 11, 12, and 13 of this Agreement
|
36 |
+
shall survive termination of this Agreement and EMPLOYEE’s employment with SYNTEL.
|
37 |
+
The parties submit to the jurisdiction and venue of the circuit court for the
|
38 |
+
County of Oakland, State of Michigan or, if original jurisdiction can be established,
|
39 |
+
the United States District Court for the Eastern District of Michigan with respect
|
40 |
+
to: a) disputes, controversies, or claims arising out of EMPLOYEE’S failure to
|
41 |
+
abide by Paragraphs 6, 7, and/or Exhibit A – “Confidential Information” of this
|
42 |
+
Agreement, b) claims initiated by SYNTEL pursuant to Paragraph 10 of this Agreement,
|
43 |
+
and c) the enforcement of any awards or relief granted pursuant to the dispute
|
44 |
+
resolution procedures set forth in Paragraph 11 of this Agreement. The parties
|
45 |
+
stipulate that the venues referenced in this Agreement are convenient. This Agreement
|
46 |
+
shall be construed under and in accordance with the laws of the State of Michigan.'
|
47 |
+
- 'The existence and terms of this Term Sheet are “Confidential Information” under
|
48 |
+
and subject to the terms of the Confidentiality Agreement, dated February 23,
|
49 |
+
2016 (as amended on August 16, 2016, the “ Confidentiality Agreement ”), between
|
50 |
+
CHC Leasing (Ireland) Limited and The Milestone Aviation Group Limited. The parties
|
51 |
+
confirm that the Confidentiality Agreement remains in full force and effect; provided
|
52 |
+
, however, the parties (i) agree that each party may disclose Confidential Information
|
53 |
+
to the professional advisers retained by the Committee and (ii) agree to work
|
54 |
+
in good faith to amend the Confidentiality Agreement to permit certain participants
|
55 |
+
in the Chapter 11 Case (as agreed to by the parties) to view a partially redacted
|
56 |
+
version of this Term Sheet. In addition, as each of the parties hereto acknowledges
|
57 |
+
that this Term Sheet is itself, and this Term Sheet contains, commercially sensitive
|
58 |
+
and proprietary information, with respect to the Chapter 11 Case, each of the
|
59 |
+
parties agrees to maintain this Term Sheet and this information strictly confidential,
|
60 |
+
and agrees to disclose it to no person other than: (i) the parties to the Plan
|
61 |
+
Support Agreement (ii) any person that has executed an accession and joinder to
|
62 |
+
the Confidentiality Agreement in the form appended thereto, (iii) the Bankruptcy
|
63 |
+
Court during the course of the Chapter 11 Case, provided , however, that no document
|
64 |
+
relating to the proposed transactions (including this Term Sheet) shall be filed
|
65 |
+
with the Bankruptcy Court (other than a motion, in form and substance acceptable
|
66 |
+
to the CHC Parties and the Milestone Parties, seeking protective order authority
|
67 |
+
to file this Term Sheet under seal, which motion shall not describe the specific
|
68 |
+
economic elements of the transaction) unless either (x) there has been obtained
|
69 |
+
prior to the filing thereof an order of the Bankruptcy Court acceptable to the
|
70 |
+
Milestone Parties enabling the CHC Parties to file such document under seal or
|
71 |
+
(y) portions of such filed documents mutually agreed upon by the CHC Parties and
|
72 |
+
the Milestone Parties are redacted, and (iv) the professional advisors of the
|
73 |
+
Committee on a confidential basis pursuant to a letter agreement entered into
|
74 |
+
with the Committee acceptable to the CHC Parties and Milestone setting forth a
|
75 |
+
protocol for disclosure including the information that can be disclosed generally
|
76 |
+
to the Committee and the information that is subject to limited disclosure to
|
77 |
+
only certain professional advisors to the Committee.'
|
78 |
+
- source_sentence: Anti-Assignment
|
79 |
+
sentences:
|
80 |
+
- Backhaul
|
81 |
+
- This agreement may not be assigned or delegated by Affiliate without prior written consent from Network 1.
|
82 |
+
- HealthGate will liaise with the Publishers, making available for such
|
83 |
+
purposes such HealthGate liaison staff as the Publishers may reasonably
|
84 |
+
require, and acting in all good faith, to ensure a mutually satisfactory
|
85 |
+
license to the Publishers or, at the Publishers' option, to a replacement
|
86 |
+
contractor.
|
87 |
+
- source_sentence: Notice Period To Terminate Renewal
|
88 |
+
sentences:
|
89 |
+
- After the initial period of two years, the maintenance and support contract
|
90 |
+
shall be automatically renewed for a period of one year on each renewal
|
91 |
+
date, unless one of the parties terminates the maintenance and support contract
|
92 |
+
through written notification to the other party in the form of a registered
|
93 |
+
letter with proof of receipt, at least six (6) weeks prior to the renewal
|
94 |
+
date.
|
95 |
+
- Any Transfer without such approval shall constitute a breach of this Agreement and
|
96 |
+
shall be void and of no effect.
|
97 |
+
- The Company shall do and perform, or cause to be done and performed, all such
|
98 |
+
further acts and things, and shall execute and deliver all such other agreements,
|
99 |
+
certificates, instruments and documents, as the MHR Funds may reasonably request
|
100 |
+
in order to carry out the intent and accomplish the purposes of this Agreement
|
101 |
+
and the consummation of the transactions contemplated hereby.
|
102 |
+
- source_sentence: Governing Law
|
103 |
+
sentences:
|
104 |
+
- In addition, the limitations in Section 23.1(b) will not apply (1) to Company's
|
105 |
+
indemnification obligations under Section 22.1(a) or (2) Allscripts indemnification
|
106 |
+
obligations under Section 22.3(a), unless the Company's or Allscripts' indemnification
|
107 |
+
obligation under Section 22.1(a) or 22.3(a), as the case may be, relates to the
|
108 |
+
losses and obligations described in subclauses (a) through (f) of the preceding
|
109 |
+
sentence. [***].
|
110 |
+
- 'THIS AGREEMENT SHALL BE GOVERNED BY AND CONSTRUED IN ACCORDANCE WITH THE INTERNAL
|
111 |
+
LAWS OF THE STATE OF NEW YORK APPLICABLE TO AGREEMENTS MADE AND TO BE PERFORMED
|
112 |
+
ENTIRELY WITHIN SUCH STATE, WITHOUT REGARD TO THE CONFLICTS OF LAW PRINCIPLES
|
113 |
+
OF SUCH STATE OTHER THAN SECTIONS 5-1401 OF THE NEW YORK GENERAL
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
|
119 |
+
|
120 |
+
OBLIGATIONS LAW.'
|
121 |
+
- All such records required to be created and maintained pursuant to Section 2.12(a)
|
122 |
+
shall be kept available at the Operator's office and made available for the Owner's
|
123 |
+
inspection upon request at all reasonable times.
|
124 |
+
- source_sentence: License Grant
|
125 |
+
sentences:
|
126 |
+
- SIERRA hereby grants ENVISION an exclusive, royalty-free sub-license
|
127 |
+
of the Product's future patents, and patent applications to distribute, sell and
|
128 |
+
market the Finished Product.
|
129 |
+
- Aucta should continue to receive 15% of Net Sales Royalty for as long as ETON
|
130 |
+
is selling the Product(s) in the Territory, unless otherwise agreed to under this
|
131 |
+
Agreement.
|
132 |
+
- In the event FCE notifies ExxonMobil that it has formally decided not to pursue
|
133 |
+
Generation 2 Technology for Power Applications, then upon ExxonMobil's written
|
134 |
+
request, FCE agrees to negotiate a grant to ExxonMobil and its Affiliates, under
|
135 |
+
commercially reasonable terms to be determined in good faith, a worldwide, royalty-bearing
|
136 |
+
(with the royalty to be negotiated), non-exclusive, sub-licensable right and license
|
137 |
+
to practice FCE Background Information and FCE Background Patents for Generation
|
138 |
+
2 Technology in any application outside of Carbon Capture Applications and Hydrogen
|
139 |
+
Applications.
|
140 |
+
pipeline_tag: sentence-similarity
|
141 |
+
library_name: sentence-transformers
|
142 |
+
---
|
143 |
+
|
144 |
+
# SentenceTransformer
|
145 |
+
|
146 |
+
This is a [sentence-transformers](https://www.SBERT.net) model trained. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
147 |
+
|
148 |
+
## Model Details
|
149 |
+
|
150 |
+
### Model Description
|
151 |
+
- **Model Type:** Sentence Transformer
|
152 |
+
<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
|
153 |
+
- **Maximum Sequence Length:** 512 tokens
|
154 |
+
- **Output Dimensionality:** 1024 dimensions
|
155 |
+
- **Similarity Function:** Cosine Similarity
|
156 |
+
<!-- - **Training Dataset:** Unknown -->
|
157 |
+
<!-- - **Language:** Unknown -->
|
158 |
+
<!-- - **License:** Unknown -->
|
159 |
+
|
160 |
+
### Model Sources
|
161 |
+
|
162 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
163 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
164 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
165 |
+
|
166 |
+
### Full Model Architecture
|
167 |
+
|
168 |
+
```
|
169 |
+
SentenceTransformer(
|
170 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'})
|
171 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
172 |
+
)
|
173 |
+
```
|
174 |
+
|
175 |
+
## Usage
|
176 |
+
|
177 |
+
### Direct Usage (Sentence Transformers)
|
178 |
+
|
179 |
+
First install the Sentence Transformers library:
|
180 |
+
|
181 |
+
```bash
|
182 |
+
pip install -U sentence-transformers
|
183 |
+
```
|
184 |
+
|
185 |
+
Then you can load this model and run inference.
|
186 |
+
```python
|
187 |
+
from sentence_transformers import SentenceTransformer
|
188 |
+
|
189 |
+
# Download from the 🤗 Hub
|
190 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
191 |
+
# Run inference
|
192 |
+
sentences = [
|
193 |
+
'License Grant',
|
194 |
+
"In the event FCE notifies ExxonMobil that it has formally decided not to pursue Generation 2 Technology for Power Applications, then upon ExxonMobil's written request, FCE agrees to negotiate a grant to ExxonMobil and its Affiliates, under commercially reasonable terms to be determined in good faith, a worldwide, royalty-bearing (with the royalty to be negotiated), non-exclusive, sub-licensable right and license to practice FCE Background Information and FCE Background Patents for Generation 2 Technology in any application outside of Carbon Capture Applications and Hydrogen Applications.",
|
195 |
+
'Aucta should continue to receive 15% of Net Sales Royalty for as long as ETON is selling the Product(s) in the Territory, unless otherwise agreed to under this Agreement.',
|
196 |
+
]
|
197 |
+
embeddings = model.encode(sentences)
|
198 |
+
print(embeddings.shape)
|
199 |
+
# [3, 1024]
|
200 |
+
|
201 |
+
# Get the similarity scores for the embeddings
|
202 |
+
similarities = model.similarity(embeddings, embeddings)
|
203 |
+
print(similarities)
|
204 |
+
# tensor([[1.0000, 0.7920, 0.3253],
|
205 |
+
# [0.7920, 1.0000, 0.4614],
|
206 |
+
# [0.3253, 0.4614, 1.0000]])
|
207 |
+
```
|
208 |
+
|
209 |
+
<!--
|
210 |
+
### Direct Usage (Transformers)
|
211 |
+
|
212 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
213 |
+
|
214 |
+
</details>
|
215 |
+
-->
|
216 |
+
|
217 |
+
<!--
|
218 |
+
### Downstream Usage (Sentence Transformers)
|
219 |
+
|
220 |
+
You can finetune this model on your own dataset.
|
221 |
+
|
222 |
+
<details><summary>Click to expand</summary>
|
223 |
+
|
224 |
+
</details>
|
225 |
+
-->
|
226 |
+
|
227 |
+
<!--
|
228 |
+
### Out-of-Scope Use
|
229 |
+
|
230 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
231 |
+
-->
|
232 |
+
|
233 |
+
<!--
|
234 |
+
## Bias, Risks and Limitations
|
235 |
+
|
236 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
237 |
+
-->
|
238 |
+
|
239 |
+
<!--
|
240 |
+
### Recommendations
|
241 |
+
|
242 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
243 |
+
-->
|
244 |
+
|
245 |
+
## Training Details
|
246 |
+
|
247 |
+
### Training Dataset
|
248 |
+
|
249 |
+
#### Unnamed Dataset
|
250 |
+
|
251 |
+
* Size: 16,129 training samples
|
252 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
|
253 |
+
* Approximate statistics based on the first 1000 samples:
|
254 |
+
| | sentence_0 | sentence_1 | label |
|
255 |
+
|:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------|
|
256 |
+
| type | string | string | float |
|
257 |
+
| details | <ul><li>min: 3 tokens</li><li>mean: 54.18 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 95.75 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
|
258 |
+
* Samples:
|
259 |
+
| sentence_0 | sentence_1 | label |
|
260 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
261 |
+
| <code>Parties</code> | <code>STARTEC GLOBAL COMMUNICATIONS CORPORATION</code> | <code>1.0</code> |
|
262 |
+
| <code>The proceeds of the Revolving Loans and the Swingline Loans, and the Letters of Credit, shall be used for general corporate purposes, including, but not limited to, repayment of any Indebtedness and to backstop the issuance of commercial paper.</code> | <code>Use the proceeds of the Loans and the Letters of Credit only as contemplated in Section 3.12 . The Borrower will not request any Borrowing, and the Borrower shall not use, and shall procure that its Subsidiaries and its or their respective directors, officers, employees and agents shall not use, the proceeds of any Borrowing (a) in furtherance of an offer, payment, promise to pay, or authorization of the payment or giving of money, or anything else of value, to any Person in violation of any Anti-Corruption Laws in any material respect, (b) for the purpose of funding, financing or facilitating any unauthorized activities, business or transaction of or with any Sanctioned Person, or in any Sanctioned Country, or (c) knowingly in any manner that would result in the violation of any Sanctions Laws applicable to any party hereto.</code> | <code>1.0</code> |
|
263 |
+
| <code>Governing Law</code> | <code>state.</code> | <code>1.0</code> |
|
264 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
265 |
+
```json
|
266 |
+
{
|
267 |
+
"scale": 20.0,
|
268 |
+
"similarity_fct": "cos_sim",
|
269 |
+
"gather_across_devices": false
|
270 |
+
}
|
271 |
+
```
|
272 |
+
|
273 |
+
### Training Hyperparameters
|
274 |
+
#### Non-Default Hyperparameters
|
275 |
+
|
276 |
+
- `per_device_train_batch_size`: 2
|
277 |
+
- `per_device_eval_batch_size`: 2
|
278 |
+
- `num_train_epochs`: 1
|
279 |
+
- `fp16`: True
|
280 |
+
- `multi_dataset_batch_sampler`: round_robin
|
281 |
+
|
282 |
+
#### All Hyperparameters
|
283 |
+
<details><summary>Click to expand</summary>
|
284 |
+
|
285 |
+
- `overwrite_output_dir`: False
|
286 |
+
- `do_predict`: False
|
287 |
+
- `eval_strategy`: no
|
288 |
+
- `prediction_loss_only`: True
|
289 |
+
- `per_device_train_batch_size`: 2
|
290 |
+
- `per_device_eval_batch_size`: 2
|
291 |
+
- `per_gpu_train_batch_size`: None
|
292 |
+
- `per_gpu_eval_batch_size`: None
|
293 |
+
- `gradient_accumulation_steps`: 1
|
294 |
+
- `eval_accumulation_steps`: None
|
295 |
+
- `torch_empty_cache_steps`: None
|
296 |
+
- `learning_rate`: 5e-05
|
297 |
+
- `weight_decay`: 0.0
|
298 |
+
- `adam_beta1`: 0.9
|
299 |
+
- `adam_beta2`: 0.999
|
300 |
+
- `adam_epsilon`: 1e-08
|
301 |
+
- `max_grad_norm`: 1
|
302 |
+
- `num_train_epochs`: 1
|
303 |
+
- `max_steps`: -1
|
304 |
+
- `lr_scheduler_type`: linear
|
305 |
+
- `lr_scheduler_kwargs`: {}
|
306 |
+
- `warmup_ratio`: 0.0
|
307 |
+
- `warmup_steps`: 0
|
308 |
+
- `log_level`: passive
|
309 |
+
- `log_level_replica`: warning
|
310 |
+
- `log_on_each_node`: True
|
311 |
+
- `logging_nan_inf_filter`: True
|
312 |
+
- `save_safetensors`: True
|
313 |
+
- `save_on_each_node`: False
|
314 |
+
- `save_only_model`: False
|
315 |
+
- `restore_callback_states_from_checkpoint`: False
|
316 |
+
- `no_cuda`: False
|
317 |
+
- `use_cpu`: False
|
318 |
+
- `use_mps_device`: False
|
319 |
+
- `seed`: 42
|
320 |
+
- `data_seed`: None
|
321 |
+
- `jit_mode_eval`: False
|
322 |
+
- `use_ipex`: False
|
323 |
+
- `bf16`: False
|
324 |
+
- `fp16`: True
|
325 |
+
- `fp16_opt_level`: O1
|
326 |
+
- `half_precision_backend`: auto
|
327 |
+
- `bf16_full_eval`: False
|
328 |
+
- `fp16_full_eval`: False
|
329 |
+
- `tf32`: None
|
330 |
+
- `local_rank`: 0
|
331 |
+
- `ddp_backend`: None
|
332 |
+
- `tpu_num_cores`: None
|
333 |
+
- `tpu_metrics_debug`: False
|
334 |
+
- `debug`: []
|
335 |
+
- `dataloader_drop_last`: False
|
336 |
+
- `dataloader_num_workers`: 0
|
337 |
+
- `dataloader_prefetch_factor`: None
|
338 |
+
- `past_index`: -1
|
339 |
+
- `disable_tqdm`: False
|
340 |
+
- `remove_unused_columns`: True
|
341 |
+
- `label_names`: None
|
342 |
+
- `load_best_model_at_end`: False
|
343 |
+
- `ignore_data_skip`: False
|
344 |
+
- `fsdp`: []
|
345 |
+
- `fsdp_min_num_params`: 0
|
346 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
347 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
348 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
349 |
+
- `deepspeed`: None
|
350 |
+
- `label_smoothing_factor`: 0.0
|
351 |
+
- `optim`: adamw_torch_fused
|
352 |
+
- `optim_args`: None
|
353 |
+
- `adafactor`: False
|
354 |
+
- `group_by_length`: False
|
355 |
+
- `length_column_name`: length
|
356 |
+
- `ddp_find_unused_parameters`: None
|
357 |
+
- `ddp_bucket_cap_mb`: None
|
358 |
+
- `ddp_broadcast_buffers`: False
|
359 |
+
- `dataloader_pin_memory`: True
|
360 |
+
- `dataloader_persistent_workers`: False
|
361 |
+
- `skip_memory_metrics`: True
|
362 |
+
- `use_legacy_prediction_loop`: False
|
363 |
+
- `push_to_hub`: False
|
364 |
+
- `resume_from_checkpoint`: None
|
365 |
+
- `hub_model_id`: None
|
366 |
+
- `hub_strategy`: every_save
|
367 |
+
- `hub_private_repo`: None
|
368 |
+
- `hub_always_push`: False
|
369 |
+
- `hub_revision`: None
|
370 |
+
- `gradient_checkpointing`: False
|
371 |
+
- `gradient_checkpointing_kwargs`: None
|
372 |
+
- `include_inputs_for_metrics`: False
|
373 |
+
- `include_for_metrics`: []
|
374 |
+
- `eval_do_concat_batches`: True
|
375 |
+
- `fp16_backend`: auto
|
376 |
+
- `push_to_hub_model_id`: None
|
377 |
+
- `push_to_hub_organization`: None
|
378 |
+
- `mp_parameters`:
|
379 |
+
- `auto_find_batch_size`: False
|
380 |
+
- `full_determinism`: False
|
381 |
+
- `torchdynamo`: None
|
382 |
+
- `ray_scope`: last
|
383 |
+
- `ddp_timeout`: 1800
|
384 |
+
- `torch_compile`: False
|
385 |
+
- `torch_compile_backend`: None
|
386 |
+
- `torch_compile_mode`: None
|
387 |
+
- `include_tokens_per_second`: False
|
388 |
+
- `include_num_input_tokens_seen`: False
|
389 |
+
- `neftune_noise_alpha`: None
|
390 |
+
- `optim_target_modules`: None
|
391 |
+
- `batch_eval_metrics`: False
|
392 |
+
- `eval_on_start`: False
|
393 |
+
- `use_liger_kernel`: False
|
394 |
+
- `liger_kernel_config`: None
|
395 |
+
- `eval_use_gather_object`: False
|
396 |
+
- `average_tokens_across_devices`: False
|
397 |
+
- `prompts`: None
|
398 |
+
- `batch_sampler`: batch_sampler
|
399 |
+
- `multi_dataset_batch_sampler`: round_robin
|
400 |
+
- `router_mapping`: {}
|
401 |
+
- `learning_rate_mapping`: {}
|
402 |
+
|
403 |
+
</details>
|
404 |
+
|
405 |
+
### Training Logs
|
406 |
+
| Epoch | Step | Training Loss |
|
407 |
+
|:------:|:----:|:-------------:|
|
408 |
+
| 0.0620 | 500 | 0.62 |
|
409 |
+
| 0.1240 | 1000 | 0.3153 |
|
410 |
+
| 0.1860 | 1500 | 0.2382 |
|
411 |
+
|
412 |
+
|
413 |
+
### Framework Versions
|
414 |
+
- Python: 3.12.11
|
415 |
+
- Sentence Transformers: 5.1.0
|
416 |
+
- Transformers: 4.55.4
|
417 |
+
- PyTorch: 2.8.0+cu126
|
418 |
+
- Accelerate: 1.10.1
|
419 |
+
- Datasets: 4.0.0
|
420 |
+
- Tokenizers: 0.21.4
|
421 |
+
|
422 |
+
## Citation
|
423 |
+
|
424 |
+
### BibTeX
|
425 |
+
|
426 |
+
#### Sentence Transformers
|
427 |
+
```bibtex
|
428 |
+
@inproceedings{reimers-2019-sentence-bert,
|
429 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
430 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
431 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
432 |
+
month = "11",
|
433 |
+
year = "2019",
|
434 |
+
publisher = "Association for Computational Linguistics",
|
435 |
+
url = "https://arxiv.org/abs/1908.10084",
|
436 |
+
}
|
437 |
+
```
|
438 |
+
|
439 |
+
#### MultipleNegativesRankingLoss
|
440 |
+
```bibtex
|
441 |
+
@misc{henderson2017efficient,
|
442 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
443 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
444 |
+
year={2017},
|
445 |
+
eprint={1705.00652},
|
446 |
+
archivePrefix={arXiv},
|
447 |
+
primaryClass={cs.CL}
|
448 |
+
}
|
449 |
+
```
|
450 |
+
|
451 |
+
<!--
|
452 |
+
## Glossary
|
453 |
+
|
454 |
+
*Clearly define terms in order to be accessible across audiences.*
|
455 |
+
-->
|
456 |
+
|
457 |
+
<!--
|
458 |
+
## Model Card Authors
|
459 |
+
|
460 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
461 |
+
-->
|
462 |
+
|
463 |
+
<!--
|
464 |
+
## Model Card Contact
|
465 |
+
|
466 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
467 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"XLMRobertaModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"bos_token_id": 0,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "gelu",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 1024,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 4096,
|
14 |
+
"layer_norm_eps": 1e-05,
|
15 |
+
"max_position_embeddings": 8194,
|
16 |
+
"model_type": "xlm-roberta",
|
17 |
+
"num_attention_heads": 16,
|
18 |
+
"num_hidden_layers": 24,
|
19 |
+
"output_past": true,
|
20 |
+
"pad_token_id": 1,
|
21 |
+
"position_embedding_type": "absolute",
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.55.4",
|
24 |
+
"type_vocab_size": 1,
|
25 |
+
"use_cache": true,
|
26 |
+
"vocab_size": 250002
|
27 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"model_type": "SentenceTransformer",
|
3 |
+
"__version__": {
|
4 |
+
"sentence_transformers": "5.1.0",
|
5 |
+
"transformers": "4.55.4",
|
6 |
+
"pytorch": "2.8.0+cu126"
|
7 |
+
},
|
8 |
+
"prompts": {
|
9 |
+
"query": "",
|
10 |
+
"document": ""
|
11 |
+
},
|
12 |
+
"default_prompt_name": null,
|
13 |
+
"similarity_fn_name": "cosine"
|
14 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47f038cb4a9941d4beccf5600b6d204052be302a0857f57fc62b3423671ec941
|
3 |
+
size 2271064456
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:61225b1e389daed6d167d66295cae3c5fba84e53805a5f4a46bfe722b07849aa
|
3 |
+
size 4533976430
|
rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6e8b6abcd5b7edc0a23c1974f945d7b2f085dc4cc0d8a58d8f8285654784648
|
3 |
+
size 14645
|
scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5a8a86fc500e74a162641b35f5ef4676b44c4cf6f3087e932f46794eef85e4a
|
3 |
+
size 1383
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c66f7b0aee860137529f3ac645f7468cb81a0bd5922399d65dbf9dc422486767
|
3 |
+
size 1465
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
|
3 |
+
size 5069051
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9a6af42442a3e3e9f05f618eae0bb2d98ca4f6a6406cb80ef7a4fa865204d61
|
3 |
+
size 17083052
|
tokenizer_config.json
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"250001": {
|
36 |
+
"content": "<mask>",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "<s>",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "<s>",
|
47 |
+
"eos_token": "</s>",
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "<mask>",
|
50 |
+
"max_length": 512,
|
51 |
+
"model_max_length": 512,
|
52 |
+
"pad_to_multiple_of": null,
|
53 |
+
"pad_token": "<pad>",
|
54 |
+
"pad_token_type_id": 0,
|
55 |
+
"padding_side": "right",
|
56 |
+
"sep_token": "</s>",
|
57 |
+
"sp_model_kwargs": {},
|
58 |
+
"stride": 0,
|
59 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
60 |
+
"truncation_side": "right",
|
61 |
+
"truncation_strategy": "longest_first",
|
62 |
+
"unk_token": "<unk>"
|
63 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": null,
|
3 |
+
"best_metric": null,
|
4 |
+
"best_model_checkpoint": null,
|
5 |
+
"epoch": 0.1859888406695598,
|
6 |
+
"eval_steps": 0,
|
7 |
+
"global_step": 1500,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.06199628022318661,
|
14 |
+
"grad_norm": 18.962848663330078,
|
15 |
+
"learning_rate": 9.940000000000001e-07,
|
16 |
+
"loss": 0.62,
|
17 |
+
"step": 500
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 0.12399256044637322,
|
21 |
+
"grad_norm": 30.33436393737793,
|
22 |
+
"learning_rate": 1.9880000000000003e-06,
|
23 |
+
"loss": 0.3153,
|
24 |
+
"step": 1000
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.1859888406695598,
|
28 |
+
"grad_norm": 140.38357543945312,
|
29 |
+
"learning_rate": 2.9880000000000004e-06,
|
30 |
+
"loss": 0.2382,
|
31 |
+
"step": 1500
|
32 |
+
}
|
33 |
+
],
|
34 |
+
"logging_steps": 500,
|
35 |
+
"max_steps": 8065,
|
36 |
+
"num_input_tokens_seen": 0,
|
37 |
+
"num_train_epochs": 1,
|
38 |
+
"save_steps": 500,
|
39 |
+
"stateful_callbacks": {
|
40 |
+
"TrainerControl": {
|
41 |
+
"args": {
|
42 |
+
"should_epoch_stop": false,
|
43 |
+
"should_evaluate": false,
|
44 |
+
"should_log": false,
|
45 |
+
"should_save": true,
|
46 |
+
"should_training_stop": false
|
47 |
+
},
|
48 |
+
"attributes": {}
|
49 |
+
}
|
50 |
+
},
|
51 |
+
"total_flos": 0.0,
|
52 |
+
"train_batch_size": 2,
|
53 |
+
"trial_name": null,
|
54 |
+
"trial_params": null
|
55 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:febef21c6ea635835a59b921426f94db975350cfc9b2f9f2733d543700dbabce
|
3 |
+
size 6033
|