yasserrmd commited on
Commit
a5f86fa
·
verified ·
1 Parent(s): ba5f793

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1024,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - dense
7
+ - generated_from_trainer
8
+ - dataset_size:16129
9
+ - loss:MultipleNegativesRankingLoss
10
+ widget:
11
+ - source_sentence: Rofr/Rofo/Rofn
12
+ sentences:
13
+ - between the parties is not executed within thirty (30) days following delivery,
14
+ of such notice to Snap, Snap shall be free thereafter to enter into an such an
15
+ agreement with any third party.
16
+ - 'This Agreement contains the entire agreement of the parties and SYNTEL shall
17
+ not be bound by any other different, additional, or further agreements or understandings
18
+ except as consented to in writing by the Chief Administrative Officer or Director,
19
+ Human Resources of SYNTEL. This Agreement shall be binding upon and inure to the
20
+ benefit of the parties hereto and their respective successors and assigns. No
21
+ amendment hereof shall be effective unless contained in a written instrument signed
22
+ by the parties hereto. No delay or omission by either party to exercise any right
23
+ or power under this Agreement shall impair such right or power or be construed
24
+ to be a waiver thereof. A waiver by either party of any of the covenants to be
25
+ performed by the other party or of any breach shall not be construed to be a waiver
26
+ of any succeeding breach or of any other covenant. If any portion of any provision
27
+ of the Agreement is declared invalid, the offending portion of such provision
28
+ shall be deemed severable from such provision and the remaining provisions of
29
+ the Agreement, which shall remain in full force and effect. EMPLOYEE shall not
30
+ assign or transfer this Agreement without the prior written consent of SYNTEL.
31
+ EMPLOYEE’s employment with SYNTEL is at will and may be terminated by SYNTEL at
32
+ any time with or without cause, and with or without notice. All rights and remedies
33
+ provided for in this Agreement shall be cumulative and in addition to and not
34
+ in lieu of any other rights or remedies available to either party at law, in equity,
35
+ or otherwise. Paragraphs 2, 3, 6, 7, 8, 9, 10, 11, 12, and 13 of this Agreement
36
+ shall survive termination of this Agreement and EMPLOYEE’s employment with SYNTEL.
37
+ The parties submit to the jurisdiction and venue of the circuit court for the
38
+ County of Oakland, State of Michigan or, if original jurisdiction can be established,
39
+ the United States District Court for the Eastern District of Michigan with respect
40
+ to: a) disputes, controversies, or claims arising out of EMPLOYEE’S failure to
41
+ abide by Paragraphs 6, 7, and/or Exhibit A – “Confidential Information” of this
42
+ Agreement, b) claims initiated by SYNTEL pursuant to Paragraph 10 of this Agreement,
43
+ and c) the enforcement of any awards or relief granted pursuant to the dispute
44
+ resolution procedures set forth in Paragraph 11 of this Agreement. The parties
45
+ stipulate that the venues referenced in this Agreement are convenient. This Agreement
46
+ shall be construed under and in accordance with the laws of the State of Michigan.'
47
+ - 'The existence and terms of this Term Sheet are “Confidential Information” under
48
+ and subject to the terms of the Confidentiality Agreement, dated February 23,
49
+ 2016 (as amended on August 16, 2016, the “ Confidentiality Agreement ”), between
50
+ CHC Leasing (Ireland) Limited and The Milestone Aviation Group Limited. The parties
51
+ confirm that the Confidentiality Agreement remains in full force and effect; provided
52
+ , however, the parties (i) agree that each party may disclose Confidential Information
53
+ to the professional advisers retained by the Committee and (ii) agree to work
54
+ in good faith to amend the Confidentiality Agreement to permit certain participants
55
+ in the Chapter 11 Case (as agreed to by the parties) to view a partially redacted
56
+ version of this Term Sheet. In addition, as each of the parties hereto acknowledges
57
+ that this Term Sheet is itself, and this Term Sheet contains, commercially sensitive
58
+ and proprietary information, with respect to the Chapter 11 Case, each of the
59
+ parties agrees to maintain this Term Sheet and this information strictly confidential,
60
+ and agrees to disclose it to no person other than: (i) the parties to the Plan
61
+ Support Agreement (ii) any person that has executed an accession and joinder to
62
+ the Confidentiality Agreement in the form appended thereto, (iii) the Bankruptcy
63
+ Court during the course of the Chapter 11 Case, provided , however, that no document
64
+ relating to the proposed transactions (including this Term Sheet) shall be filed
65
+ with the Bankruptcy Court (other than a motion, in form and substance acceptable
66
+ to the CHC Parties and the Milestone Parties, seeking protective order authority
67
+ to file this Term Sheet under seal, which motion shall not describe the specific
68
+ economic elements of the transaction) unless either (x) there has been obtained
69
+ prior to the filing thereof an order of the Bankruptcy Court acceptable to the
70
+ Milestone Parties enabling the CHC Parties to file such document under seal or
71
+ (y) portions of such filed documents mutually agreed upon by the CHC Parties and
72
+ the Milestone Parties are redacted, and (iv) the professional advisors of the
73
+ Committee on a confidential basis pursuant to a letter agreement entered into
74
+ with the Committee acceptable to the CHC Parties and Milestone setting forth a
75
+ protocol for disclosure including the information that can be disclosed generally
76
+ to the Committee and the information that is subject to limited disclosure to
77
+ only certain professional advisors to the Committee.'
78
+ - source_sentence: Anti-Assignment
79
+ sentences:
80
+ - Backhaul
81
+ - This agreement may not be assigned or delegated by Affiliate without prior written consent from Network 1.
82
+ - HealthGate will liaise with the Publishers, making available for such
83
+ purposes such HealthGate liaison staff as the Publishers may reasonably
84
+ require, and acting in all good faith, to ensure a mutually satisfactory
85
+ license to the Publishers or, at the Publishers' option, to a replacement
86
+ contractor.
87
+ - source_sentence: Notice Period To Terminate Renewal
88
+ sentences:
89
+ - After the initial period of two years, the maintenance and support contract
90
+ shall be automatically renewed for a period of one year on each renewal
91
+ date, unless one of the parties terminates the maintenance and support contract
92
+ through written notification to the other party in the form of a registered
93
+ letter with proof of receipt, at least six (6) weeks prior to the renewal
94
+ date.
95
+ - Any Transfer without such approval shall constitute a breach of this Agreement and
96
+ shall be void and of no effect.
97
+ - The Company shall do and perform, or cause to be done and performed, all such
98
+ further acts and things, and shall execute and deliver all such other agreements,
99
+ certificates, instruments and documents, as the MHR Funds may reasonably request
100
+ in order to carry out the intent and accomplish the purposes of this Agreement
101
+ and the consummation of the transactions contemplated hereby.
102
+ - source_sentence: Governing Law
103
+ sentences:
104
+ - In addition, the limitations in Section 23.1(b) will not apply (1) to Company's
105
+ indemnification obligations under Section 22.1(a) or (2) Allscripts indemnification
106
+ obligations under Section 22.3(a), unless the Company's or Allscripts' indemnification
107
+ obligation under Section 22.1(a) or 22.3(a), as the case may be, relates to the
108
+ losses and obligations described in subclauses (a) through (f) of the preceding
109
+ sentence. [***].
110
+ - 'THIS AGREEMENT SHALL BE GOVERNED BY AND CONSTRUED IN ACCORDANCE WITH THE INTERNAL
111
+ LAWS OF THE STATE OF NEW YORK APPLICABLE TO AGREEMENTS MADE AND TO BE PERFORMED
112
+ ENTIRELY WITHIN SUCH STATE, WITHOUT REGARD TO THE CONFLICTS OF LAW PRINCIPLES
113
+ OF SUCH STATE OTHER THAN SECTIONS 5-1401 OF THE NEW YORK GENERAL
114
+
115
+
116
+
117
+
118
+
119
+
120
+ OBLIGATIONS LAW.'
121
+ - All such records required to be created and maintained pursuant to Section 2.12(a)
122
+ shall be kept available at the Operator's office and made available for the Owner's
123
+ inspection upon request at all reasonable times.
124
+ - source_sentence: License Grant
125
+ sentences:
126
+ - SIERRA hereby grants ENVISION an exclusive, royalty-free sub-license
127
+ of the Product's future patents, and patent applications to distribute, sell and
128
+ market the Finished Product.
129
+ - Aucta should continue to receive 15% of Net Sales Royalty for as long as ETON
130
+ is selling the Product(s) in the Territory, unless otherwise agreed to under this
131
+ Agreement.
132
+ - In the event FCE notifies ExxonMobil that it has formally decided not to pursue
133
+ Generation 2 Technology for Power Applications, then upon ExxonMobil's written
134
+ request, FCE agrees to negotiate a grant to ExxonMobil and its Affiliates, under
135
+ commercially reasonable terms to be determined in good faith, a worldwide, royalty-bearing
136
+ (with the royalty to be negotiated), non-exclusive, sub-licensable right and license
137
+ to practice FCE Background Information and FCE Background Patents for Generation
138
+ 2 Technology in any application outside of Carbon Capture Applications and Hydrogen
139
+ Applications.
140
+ pipeline_tag: sentence-similarity
141
+ library_name: sentence-transformers
142
+ ---
143
+
144
+ # SentenceTransformer
145
+
146
+ This is a [sentence-transformers](https://www.SBERT.net) model trained. It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
147
+
148
+ ## Model Details
149
+
150
+ ### Model Description
151
+ - **Model Type:** Sentence Transformer
152
+ <!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
153
+ - **Maximum Sequence Length:** 512 tokens
154
+ - **Output Dimensionality:** 1024 dimensions
155
+ - **Similarity Function:** Cosine Similarity
156
+ <!-- - **Training Dataset:** Unknown -->
157
+ <!-- - **Language:** Unknown -->
158
+ <!-- - **License:** Unknown -->
159
+
160
+ ### Model Sources
161
+
162
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
163
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
164
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
165
+
166
+ ### Full Model Architecture
167
+
168
+ ```
169
+ SentenceTransformer(
170
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'XLMRobertaModel'})
171
+ (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
172
+ )
173
+ ```
174
+
175
+ ## Usage
176
+
177
+ ### Direct Usage (Sentence Transformers)
178
+
179
+ First install the Sentence Transformers library:
180
+
181
+ ```bash
182
+ pip install -U sentence-transformers
183
+ ```
184
+
185
+ Then you can load this model and run inference.
186
+ ```python
187
+ from sentence_transformers import SentenceTransformer
188
+
189
+ # Download from the 🤗 Hub
190
+ model = SentenceTransformer("sentence_transformers_model_id")
191
+ # Run inference
192
+ sentences = [
193
+ 'License Grant',
194
+ "In the event FCE notifies ExxonMobil that it has formally decided not to pursue Generation 2 Technology for Power Applications, then upon ExxonMobil's written request, FCE agrees to negotiate a grant to ExxonMobil and its Affiliates, under commercially reasonable terms to be determined in good faith, a worldwide, royalty-bearing (with the royalty to be negotiated), non-exclusive, sub-licensable right and license to practice FCE Background Information and FCE Background Patents for Generation 2 Technology in any application outside of Carbon Capture Applications and Hydrogen Applications.",
195
+ 'Aucta should continue to receive 15% of Net Sales Royalty for as long as ETON is selling the Product(s) in the Territory, unless otherwise agreed to under this Agreement.',
196
+ ]
197
+ embeddings = model.encode(sentences)
198
+ print(embeddings.shape)
199
+ # [3, 1024]
200
+
201
+ # Get the similarity scores for the embeddings
202
+ similarities = model.similarity(embeddings, embeddings)
203
+ print(similarities)
204
+ # tensor([[1.0000, 0.7920, 0.3253],
205
+ # [0.7920, 1.0000, 0.4614],
206
+ # [0.3253, 0.4614, 1.0000]])
207
+ ```
208
+
209
+ <!--
210
+ ### Direct Usage (Transformers)
211
+
212
+ <details><summary>Click to see the direct usage in Transformers</summary>
213
+
214
+ </details>
215
+ -->
216
+
217
+ <!--
218
+ ### Downstream Usage (Sentence Transformers)
219
+
220
+ You can finetune this model on your own dataset.
221
+
222
+ <details><summary>Click to expand</summary>
223
+
224
+ </details>
225
+ -->
226
+
227
+ <!--
228
+ ### Out-of-Scope Use
229
+
230
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
231
+ -->
232
+
233
+ <!--
234
+ ## Bias, Risks and Limitations
235
+
236
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
237
+ -->
238
+
239
+ <!--
240
+ ### Recommendations
241
+
242
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
243
+ -->
244
+
245
+ ## Training Details
246
+
247
+ ### Training Dataset
248
+
249
+ #### Unnamed Dataset
250
+
251
+ * Size: 16,129 training samples
252
+ * Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
253
+ * Approximate statistics based on the first 1000 samples:
254
+ | | sentence_0 | sentence_1 | label |
255
+ |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------|
256
+ | type | string | string | float |
257
+ | details | <ul><li>min: 3 tokens</li><li>mean: 54.18 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 95.75 tokens</li><li>max: 512 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
258
+ * Samples:
259
+ | sentence_0 | sentence_1 | label |
260
+ |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------|
261
+ | <code>Parties</code> | <code>STARTEC GLOBAL COMMUNICATIONS CORPORATION</code> | <code>1.0</code> |
262
+ | <code>The proceeds of the Revolving Loans and the Swingline Loans, and the Letters of Credit, shall be used for general corporate purposes, including, but not limited to, repayment of any Indebtedness and to backstop the issuance of commercial paper.</code> | <code>Use the proceeds of the Loans and the Letters of Credit only as contemplated in Section  3.12 . The Borrower will not request any Borrowing, and the Borrower shall not use, and shall procure that its Subsidiaries and its or their respective directors, officers, employees and agents shall not use, the proceeds of any Borrowing (a) in furtherance of an offer, payment, promise to pay, or authorization of the payment or giving of money, or anything else of value, to any Person in violation of any Anti-Corruption Laws in any material respect, (b) for the purpose of funding, financing or facilitating any unauthorized activities, business or transaction of or with any Sanctioned Person, or in any Sanctioned Country, or (c) knowingly in any manner that would result in the violation of any Sanctions Laws applicable to any party hereto.</code> | <code>1.0</code> |
263
+ | <code>Governing Law</code> | <code>state.</code> | <code>1.0</code> |
264
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
265
+ ```json
266
+ {
267
+ "scale": 20.0,
268
+ "similarity_fct": "cos_sim",
269
+ "gather_across_devices": false
270
+ }
271
+ ```
272
+
273
+ ### Training Hyperparameters
274
+ #### Non-Default Hyperparameters
275
+
276
+ - `per_device_train_batch_size`: 2
277
+ - `per_device_eval_batch_size`: 2
278
+ - `num_train_epochs`: 1
279
+ - `fp16`: True
280
+ - `multi_dataset_batch_sampler`: round_robin
281
+
282
+ #### All Hyperparameters
283
+ <details><summary>Click to expand</summary>
284
+
285
+ - `overwrite_output_dir`: False
286
+ - `do_predict`: False
287
+ - `eval_strategy`: no
288
+ - `prediction_loss_only`: True
289
+ - `per_device_train_batch_size`: 2
290
+ - `per_device_eval_batch_size`: 2
291
+ - `per_gpu_train_batch_size`: None
292
+ - `per_gpu_eval_batch_size`: None
293
+ - `gradient_accumulation_steps`: 1
294
+ - `eval_accumulation_steps`: None
295
+ - `torch_empty_cache_steps`: None
296
+ - `learning_rate`: 5e-05
297
+ - `weight_decay`: 0.0
298
+ - `adam_beta1`: 0.9
299
+ - `adam_beta2`: 0.999
300
+ - `adam_epsilon`: 1e-08
301
+ - `max_grad_norm`: 1
302
+ - `num_train_epochs`: 1
303
+ - `max_steps`: -1
304
+ - `lr_scheduler_type`: linear
305
+ - `lr_scheduler_kwargs`: {}
306
+ - `warmup_ratio`: 0.0
307
+ - `warmup_steps`: 0
308
+ - `log_level`: passive
309
+ - `log_level_replica`: warning
310
+ - `log_on_each_node`: True
311
+ - `logging_nan_inf_filter`: True
312
+ - `save_safetensors`: True
313
+ - `save_on_each_node`: False
314
+ - `save_only_model`: False
315
+ - `restore_callback_states_from_checkpoint`: False
316
+ - `no_cuda`: False
317
+ - `use_cpu`: False
318
+ - `use_mps_device`: False
319
+ - `seed`: 42
320
+ - `data_seed`: None
321
+ - `jit_mode_eval`: False
322
+ - `use_ipex`: False
323
+ - `bf16`: False
324
+ - `fp16`: True
325
+ - `fp16_opt_level`: O1
326
+ - `half_precision_backend`: auto
327
+ - `bf16_full_eval`: False
328
+ - `fp16_full_eval`: False
329
+ - `tf32`: None
330
+ - `local_rank`: 0
331
+ - `ddp_backend`: None
332
+ - `tpu_num_cores`: None
333
+ - `tpu_metrics_debug`: False
334
+ - `debug`: []
335
+ - `dataloader_drop_last`: False
336
+ - `dataloader_num_workers`: 0
337
+ - `dataloader_prefetch_factor`: None
338
+ - `past_index`: -1
339
+ - `disable_tqdm`: False
340
+ - `remove_unused_columns`: True
341
+ - `label_names`: None
342
+ - `load_best_model_at_end`: False
343
+ - `ignore_data_skip`: False
344
+ - `fsdp`: []
345
+ - `fsdp_min_num_params`: 0
346
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
347
+ - `fsdp_transformer_layer_cls_to_wrap`: None
348
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
349
+ - `deepspeed`: None
350
+ - `label_smoothing_factor`: 0.0
351
+ - `optim`: adamw_torch_fused
352
+ - `optim_args`: None
353
+ - `adafactor`: False
354
+ - `group_by_length`: False
355
+ - `length_column_name`: length
356
+ - `ddp_find_unused_parameters`: None
357
+ - `ddp_bucket_cap_mb`: None
358
+ - `ddp_broadcast_buffers`: False
359
+ - `dataloader_pin_memory`: True
360
+ - `dataloader_persistent_workers`: False
361
+ - `skip_memory_metrics`: True
362
+ - `use_legacy_prediction_loop`: False
363
+ - `push_to_hub`: False
364
+ - `resume_from_checkpoint`: None
365
+ - `hub_model_id`: None
366
+ - `hub_strategy`: every_save
367
+ - `hub_private_repo`: None
368
+ - `hub_always_push`: False
369
+ - `hub_revision`: None
370
+ - `gradient_checkpointing`: False
371
+ - `gradient_checkpointing_kwargs`: None
372
+ - `include_inputs_for_metrics`: False
373
+ - `include_for_metrics`: []
374
+ - `eval_do_concat_batches`: True
375
+ - `fp16_backend`: auto
376
+ - `push_to_hub_model_id`: None
377
+ - `push_to_hub_organization`: None
378
+ - `mp_parameters`:
379
+ - `auto_find_batch_size`: False
380
+ - `full_determinism`: False
381
+ - `torchdynamo`: None
382
+ - `ray_scope`: last
383
+ - `ddp_timeout`: 1800
384
+ - `torch_compile`: False
385
+ - `torch_compile_backend`: None
386
+ - `torch_compile_mode`: None
387
+ - `include_tokens_per_second`: False
388
+ - `include_num_input_tokens_seen`: False
389
+ - `neftune_noise_alpha`: None
390
+ - `optim_target_modules`: None
391
+ - `batch_eval_metrics`: False
392
+ - `eval_on_start`: False
393
+ - `use_liger_kernel`: False
394
+ - `liger_kernel_config`: None
395
+ - `eval_use_gather_object`: False
396
+ - `average_tokens_across_devices`: False
397
+ - `prompts`: None
398
+ - `batch_sampler`: batch_sampler
399
+ - `multi_dataset_batch_sampler`: round_robin
400
+ - `router_mapping`: {}
401
+ - `learning_rate_mapping`: {}
402
+
403
+ </details>
404
+
405
+ ### Training Logs
406
+ | Epoch | Step | Training Loss |
407
+ |:------:|:----:|:-------------:|
408
+ | 0.0620 | 500 | 0.62 |
409
+ | 0.1240 | 1000 | 0.3153 |
410
+ | 0.1860 | 1500 | 0.2382 |
411
+
412
+
413
+ ### Framework Versions
414
+ - Python: 3.12.11
415
+ - Sentence Transformers: 5.1.0
416
+ - Transformers: 4.55.4
417
+ - PyTorch: 2.8.0+cu126
418
+ - Accelerate: 1.10.1
419
+ - Datasets: 4.0.0
420
+ - Tokenizers: 0.21.4
421
+
422
+ ## Citation
423
+
424
+ ### BibTeX
425
+
426
+ #### Sentence Transformers
427
+ ```bibtex
428
+ @inproceedings{reimers-2019-sentence-bert,
429
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
430
+ author = "Reimers, Nils and Gurevych, Iryna",
431
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
432
+ month = "11",
433
+ year = "2019",
434
+ publisher = "Association for Computational Linguistics",
435
+ url = "https://arxiv.org/abs/1908.10084",
436
+ }
437
+ ```
438
+
439
+ #### MultipleNegativesRankingLoss
440
+ ```bibtex
441
+ @misc{henderson2017efficient,
442
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
443
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
444
+ year={2017},
445
+ eprint={1705.00652},
446
+ archivePrefix={arXiv},
447
+ primaryClass={cs.CL}
448
+ }
449
+ ```
450
+
451
+ <!--
452
+ ## Glossary
453
+
454
+ *Clearly define terms in order to be accessible across audiences.*
455
+ -->
456
+
457
+ <!--
458
+ ## Model Card Authors
459
+
460
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
461
+ -->
462
+
463
+ <!--
464
+ ## Model Card Contact
465
+
466
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
467
+ -->
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "XLMRobertaModel"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 0,
7
+ "classifier_dropout": null,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 8194,
16
+ "model_type": "xlm-roberta",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "output_past": true,
20
+ "pad_token_id": 1,
21
+ "position_embedding_type": "absolute",
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.55.4",
24
+ "type_vocab_size": 1,
25
+ "use_cache": true,
26
+ "vocab_size": 250002
27
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.0",
5
+ "transformers": "4.55.4",
6
+ "pytorch": "2.8.0+cu126"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47f038cb4a9941d4beccf5600b6d204052be302a0857f57fc62b3423671ec941
3
+ size 2271064456
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61225b1e389daed6d167d66295cae3c5fba84e53805a5f4a46bfe722b07849aa
3
+ size 4533976430
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6e8b6abcd5b7edc0a23c1974f945d7b2f085dc4cc0d8a58d8f8285654784648
3
+ size 14645
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5a8a86fc500e74a162641b35f5ef4676b44c4cf6f3087e932f46794eef85e4a
3
+ size 1383
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c66f7b0aee860137529f3ac645f7468cb81a0bd5922399d65dbf9dc422486767
3
+ size 1465
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9a6af42442a3e3e9f05f618eae0bb2d98ca4f6a6406cb80ef7a4fa865204d61
3
+ size 17083052
tokenizer_config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "250001": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "<s>",
47
+ "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
+ "mask_token": "<mask>",
50
+ "max_length": 512,
51
+ "model_max_length": 512,
52
+ "pad_to_multiple_of": null,
53
+ "pad_token": "<pad>",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
+ "sep_token": "</s>",
57
+ "sp_model_kwargs": {},
58
+ "stride": 0,
59
+ "tokenizer_class": "XLMRobertaTokenizer",
60
+ "truncation_side": "right",
61
+ "truncation_strategy": "longest_first",
62
+ "unk_token": "<unk>"
63
+ }
trainer_state.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 0.1859888406695598,
6
+ "eval_steps": 0,
7
+ "global_step": 1500,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.06199628022318661,
14
+ "grad_norm": 18.962848663330078,
15
+ "learning_rate": 9.940000000000001e-07,
16
+ "loss": 0.62,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 0.12399256044637322,
21
+ "grad_norm": 30.33436393737793,
22
+ "learning_rate": 1.9880000000000003e-06,
23
+ "loss": 0.3153,
24
+ "step": 1000
25
+ },
26
+ {
27
+ "epoch": 0.1859888406695598,
28
+ "grad_norm": 140.38357543945312,
29
+ "learning_rate": 2.9880000000000004e-06,
30
+ "loss": 0.2382,
31
+ "step": 1500
32
+ }
33
+ ],
34
+ "logging_steps": 500,
35
+ "max_steps": 8065,
36
+ "num_input_tokens_seen": 0,
37
+ "num_train_epochs": 1,
38
+ "save_steps": 500,
39
+ "stateful_callbacks": {
40
+ "TrainerControl": {
41
+ "args": {
42
+ "should_epoch_stop": false,
43
+ "should_evaluate": false,
44
+ "should_log": false,
45
+ "should_save": true,
46
+ "should_training_stop": false
47
+ },
48
+ "attributes": {}
49
+ }
50
+ },
51
+ "total_flos": 0.0,
52
+ "train_batch_size": 2,
53
+ "trial_name": null,
54
+ "trial_params": null
55
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:febef21c6ea635835a59b921426f94db975350cfc9b2f9f2733d543700dbabce
3
+ size 6033