Fatin757 commited on
Commit
9c347e7
·
verified ·
1 Parent(s): 42982cd

Add new SentenceTransformer model

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - sentence-transformers
4
+ - sentence-similarity
5
+ - feature-extraction
6
+ - dense
7
+ - generated_from_trainer
8
+ - dataset_size:6032
9
+ - loss:MultipleNegativesRankingLoss
10
+ base_model: answerdotai/ModernBERT-base
11
+ widget:
12
+ - source_sentence: The Field Sales Executive/Key Account Executive/Sales Operations
13
+ Management Specialist is responsible for being the contact point with commercial
14
+ accounts on various logistics services. He/She is also responsible to provide
15
+ support in identifying potential customers, establishing partnerships to expand
16
+ the company's businesses, selling solutions and participating in programmes targeting
17
+ different customers with support of internal departments in building customer
18
+ relationships. Resourceful and analytical, he is required to understand customer
19
+ needs and convince customers to adopt the proposed solutions.
20
+ sentences:
21
+ - The Sales Operations Coordinator is responsible for being the main point of contact
22
+ with retail clients on various marketing services. He/She is also tasked with
23
+ providing assistance in identifying potential partners, establishing collaborations
24
+ to grow the company's market reach, promoting services, and participating in initiatives
25
+ targeting diverse clients with support from different departments in nurturing
26
+ client connections. Resourceful and detail-oriented, he is required to comprehend
27
+ client preferences and influence clients to consider the proposed offerings.
28
+ - The Key Account Manager is tasked with serving as the primary liaison for commercial
29
+ clients regarding a range of logistics services. This role involves identifying
30
+ potential customers, fostering partnerships to enhance the company's market presence,
31
+ and delivering tailored solutions. The Key Account Manager collaborates with internal
32
+ teams to develop strategies aimed at strengthening customer relationships. With
33
+ a resourceful and analytical mindset, he/she must grasp customer requirements
34
+ and effectively persuade clients to embrace the recommended solutions.
35
+ - The Civil and Structural Engineering Manager is responsible for the comprehensive
36
+ management and execution of engineering projects, ensuring alignment with specific
37
+ project requirements. This role involves reviewing designs, specifications, calculations,
38
+ and various submissions to maintain project integrity. The manager oversees all
39
+ assessment phases of projects, guaranteeing that developed designs and models
40
+ meet the established criteria. Additionally, he/she leads the tendering process
41
+ and supervises a team of engineers, fostering a culture of continuous performance
42
+ enhancement. The position may also entail fulfilling the duties of a Qualified
43
+ Person as stipulated by the Building Control Act. The ideal candidate will have
44
+ strong analytical, problem-solving, and decision-making abilities, along with
45
+ expertise in civil and structural engineering practices and project management.
46
+ Exceptional leadership and communication skills are essential, and the role requires
47
+ a balance of office work and on-site project involvement.
48
+ - source_sentence: The Capacity Management Executive assists in capacity planning
49
+ and management, making recommendations on cargo mixtures based on profit maximisation
50
+ and customer demand, and proposing alternatives and contingencies to handle capacity
51
+ issues. He/She coordinates cargo handling and transhipment operations with business
52
+ partners and stakeholders and is responsible for ensuring dangerous cargo is handled
53
+ correctly. He tracks vessel movements and assist in adhoc route adjustments to
54
+ maximise voyage yields and minimise operational expenses. He possesses an innovative
55
+ mind-set and can work under tight deadlines.
56
+ sentences:
57
+ - The Lighting Technician is responsible for overseeing the installation and operation
58
+ of lighting systems for various productions. This role requires proficiency in
59
+ working at heights and operating elevated work platforms, as well as a solid understanding
60
+ of basic electrical principles. Adherence to workplace safety and health regulations
61
+ is essential. The Lighting Technician serves as the primary point of contact for
62
+ any safety concerns or incidents that may arise. Additionally, this position may
63
+ involve supervising entry-level lighting staff to ensure that lighting setups
64
+ are executed accurately and safely before and during events. Depending on their
65
+ skills and experiences, Lighting Technicians may also engage in specialized tasks.
66
+ They can work either on a full-time or casual basis across venues, rental companies,
67
+ production firms, or directly within production teams.
68
+ - The Cargo Optimization Specialist plays a crucial role in overseeing capacity
69
+ planning and management within the logistics sector. This position involves analyzing
70
+ cargo compositions to enhance profitability while meeting customer needs, as well
71
+ as suggesting alternatives and contingency plans to address any capacity challenges.
72
+ The specialist collaborates closely with business partners and stakeholders to
73
+ ensure efficient cargo handling and transshipment operations, with a strong emphasis
74
+ on the safe handling of hazardous materials. Additionally, they monitor vessel
75
+ movements and assist in making real-time route adjustments aimed at maximizing
76
+ voyage profitability and minimizing operational costs. The ideal candidate will
77
+ demonstrate innovative thinking and the ability to thrive under pressure.
78
+ - The Junior Risk Analyst is responsible for identifying and assessing potential
79
+ risks within the financial services sector. In this role, the analyst gathers
80
+ data to evaluate risk exposure and develops strategies to mitigate these risks.
81
+ They work with various teams to ensure compliance with regulatory standards and
82
+ contribute to the preparation of risk assessment reports. The Junior Risk Analyst
83
+ must possess strong analytical skills and the ability to communicate findings
84
+ effectively, while also being adaptable to changing regulations and market conditions.
85
+ This position requires a detail-oriented individual who can work collaboratively
86
+ in a fast-paced environment.
87
+ - source_sentence: The Exhibition Producer/Conference Producer/Meeting Planner is
88
+ responsible for the development of concepts and content curation for meetings,
89
+ conferences and exhibitions. He/She works closely with internal and external stakeholders
90
+ to develop incentive programmes, conferences and exhibitions. He utilises findings
91
+ from market research to develop new meetings and enhance the user experience.
92
+ Innovative and insightful, he is able to rationalise plethora of ideas into marketable
93
+ products that meets customer requirements. He stays abreast of industry and market
94
+ trends to discover current, new, and alternative growth areas and subjects for
95
+ meetings, conferences and exhibitions. He travels frequently to attend industry
96
+ events and networks extensively outside of the office to have a deeper understanding
97
+ on the emerging trends in the industry.
98
+ sentences:
99
+ - The Conference Assistant is responsible for supporting the planning and execution
100
+ of meetings and conferences. This role focuses on administrative tasks such as
101
+ scheduling, coordinating logistics, and assisting with on-site operations. The
102
+ Conference Assistant works under the guidance of senior staff to ensure all event
103
+ details are managed effectively. While they may assist in gathering information
104
+ from market research, their primary function is to handle logistical aspects rather
105
+ than develop event concepts. The position requires strong organizational skills
106
+ and the ability to follow instructions, but it does not involve the responsibility
107
+ of creating new programs or traveling for industry insights. Instead, the Conference
108
+ Assistant's role is primarily office-based, with limited external engagement.
109
+ - The Event Coordinator is tasked with the creation and organization of engaging
110
+ concepts and content for various events, including meetings, conferences, and
111
+ exhibitions. This role involves close collaboration with both internal teams and
112
+ external partners to design incentive programs and curate event experiences. Utilizing
113
+ insights gained from market research, the Event Coordinator is responsible for
114
+ innovating new events and enhancing attendee engagement. With a creative mindset,
115
+ they can distill a wide range of ideas into appealing offerings that align with
116
+ client needs. Staying informed about industry trends and market developments,
117
+ the Event Coordinator identifies new opportunities for growth and relevant topics
118
+ for events. Frequent travel to industry gatherings is essential, as it allows
119
+ for extensive networking and a deeper understanding of emerging trends.
120
+ - The Talent Development Manager is responsible for creating and executing comprehensive
121
+ talent development initiatives that align with the organization's current and
122
+ future business needs. This role involves designing clear career pathways to enhance
123
+ employee awareness of advancement opportunities and advising managers on effective
124
+ career development strategies. The Talent Development Manager oversees high-potential
125
+ talent programs and succession planning efforts, ensuring a robust pipeline for
126
+ key positions within the organization to support ongoing success and stability.
127
+ Additionally, this manager implements policies related to retirement and employee
128
+ exits, providing guidance on managing transitions effectively. The role also includes
129
+ managing team performance and operations while integrating Skill Frameworks into
130
+ talent development initiatives. The ideal candidate for this position is highly
131
+ people-oriented, possesses exceptional communication skills, and engages diplomatically
132
+ with various stakeholders. They are adept at analyzing complex challenges and
133
+ making informed decisions to drive organizational success.
134
+ - source_sentence: The Program Manager plans and oversees multiple inter-dependent
135
+ programs spanning multiple years that impact one or more business units or one
136
+ larger project. He/She oversees all aspects of assigned programs throughout program
137
+ lifecycles to ensure completion within the defined scope, quality, time and cost
138
+ constraints. He ensures accurate allocations of resources throughout the program.
139
+ He leads multi-disciplinary teams, composed of various levels of personnel, vendors,
140
+ and clients to create and deploy successful programs. He coaches team members
141
+ on Agile practices and values, and Scrum process framework. He is proficient in
142
+ Agile practices and methodology, project management methodologies and tools, as
143
+ well as Scrum process framework. The Program Manager is confident and decisive
144
+ in leading projects, overseeing the completion and integration of inter-dependent
145
+ programs and parts. He has excellent communication skills, capable of effectively
146
+ influencing various internal and external stakeholders.
147
+ sentences:
148
+ - The Senior Workplace Safety and Health (WSH) Auditor plays a pivotal role in guiding
149
+ the audit team through comprehensive WSH audits for various client organizations.
150
+ This individual is tasked with providing expert advice to stakeholders on matters
151
+ related to WSH audits. The Senior WSH Auditor excels in collaboration, possesses
152
+ strong analytical skills, is resourceful, and effectively fosters teamwork while
153
+ facilitating productive discussions.
154
+ - The Operations Manager is tasked with overseeing daily operational activities
155
+ within a specific department, ensuring that all processes run smoothly and efficiently.
156
+ This role involves managing the workflow and performance of the team to meet departmental
157
+ objectives and targets. The Operations Manager is responsible for resource allocation
158
+ and optimizing operational procedures to enhance productivity. They lead a team
159
+ of operational staff, providing support and training as necessary. Knowledge of
160
+ operational best practices and methodologies is crucial for success in this role.
161
+ The Operations Manager must be able to communicate effectively with team members
162
+ and stakeholders to facilitate collaboration and ensure alignment with organizational
163
+ goals.
164
+ - The Project Coordinator is responsible for planning and managing various interconnected
165
+ projects that span several years and affect multiple business units or a significant
166
+ project. This role involves overseeing all phases of assigned projects throughout
167
+ their lifecycles to ensure they are completed within the specified scope, quality,
168
+ timeline, and budget constraints. The Project Coordinator ensures that resources
169
+ are accurately allocated throughout the project duration. Additionally, they lead
170
+ cross-functional teams comprising different levels of personnel, vendors, and
171
+ clients to successfully implement and deliver projects. They provide guidance
172
+ to team members on Agile methodologies and practices, as well as the Scrum process
173
+ framework. Proficiency in Agile principles, project management techniques, and
174
+ Scrum methodologies is essential for this role. The Project Coordinator must be
175
+ assertive and decisive in managing projects, ensuring the successful completion
176
+ and integration of interrelated projects. Strong communication skills are crucial,
177
+ as they will need to effectively influence various stakeholders both internally
178
+ and externally.
179
+ - source_sentence: The Senior Anchor/Senior Presenter/Anchor/ Presenter - News delivers
180
+ news stories to the broadcasting station's audience and is the public face or
181
+ voice of the programmes broadcasted on various platforms. He/She is involved in
182
+ the collection of news materials and is required to conduct research on stories
183
+ and interview people who have accurate information on news events. He is also
184
+ involved in the development and writing of content and is responsible for reviewing
185
+ and editing materials written by other news reporters to ensure that the content
186
+ is tailored to the target audience. He is required to host or co-host programmes
187
+ by providing live commentaries and doing live interviews to create content that
188
+ links closely to the stories. He often works from a studio and may be expected
189
+ to travel in order to present news from remote locations in the field related
190
+ to a particular major news event. He follows a fixed working schedule, but may
191
+ be required to work at odd hours, including weekends, to cover important events.
192
+ He should be an effective communicator with an understanding of news editorial
193
+ process. He should ideally have a background in journalism or mass communications
194
+ and possesses an understanding of daily newscast content and media ethics. He
195
+ ought to be able to improvise and ad-lib in a live on-camera setting and be able
196
+ to work well with others across a variety of situations.
197
+ sentences:
198
+ - 'The Sustainability Advisor offers expert guidance to organizations seeking to
199
+ implement effective waste management practices that comply with environmental
200
+ standards and align with their corporate social responsibility objectives. This
201
+ role involves performing comprehensive waste audits and risk evaluations, crafting
202
+ and proposing strategies for waste reduction, and spearheading initiatives focused
203
+ on the circular economy and sustainability. The Sustainability Advisor is expected
204
+ to utilize advanced waste management technologies, ensure adherence to regulations,
205
+ and conduct life cycle assessments to deliver practical recommendations that facilitate
206
+ organizational transformation. Additionally, this position is crucial in maintaining
207
+ regulatory compliance and permits, providing training to stakeholders on best
208
+ waste management practices, and promoting a culture of engagement and change among
209
+ all parties involved.
210
+
211
+
212
+ The Sustainability Advisor must possess strong problem-solving abilities, effective
213
+ communication skills, and the capacity to influence others, enabling them to engage
214
+ with cross-functional teams and a variety of stakeholders successfully. They will
215
+ manage waste management projects efficiently and cultivate a sustainable culture
216
+ within organizations to help achieve long-term waste management objectives.'
217
+ - The Junior News Reporter is tasked with gathering and compiling news stories for
218
+ the broadcasting station's audience, acting as a supportive figure in various
219
+ programs aired on different platforms. This position includes the collection of
220
+ news materials and conducting basic research on stories, as well as interviewing
221
+ individuals who may provide information on news events. The Junior News Reporter
222
+ assists in the writing and editing of content produced by senior reporters, ensuring
223
+ it meets the audience's needs. This role may also involve supporting hosts during
224
+ live programs by providing background information and conducting interviews to
225
+ aid in storytelling. While primarily working from the studio, there may be occasional
226
+ travel to gather information for specific news events. The Junior News Reporter
227
+ typically follows a standard work schedule but might be called upon to work during
228
+ odd hours, including weekends, to assist in covering significant events. Effective
229
+ communication skills and a foundational understanding of media ethics are important,
230
+ along with a basic knowledge of the news editorial process. The ability to work
231
+ collaboratively with colleagues and adapt to various situations is also necessary.
232
+ - The Lead News Presenter is responsible for delivering engaging news stories to
233
+ the audience of the broadcasting station, serving as the public face or voice
234
+ of various programs aired across multiple platforms. This role involves gathering
235
+ news materials, conducting thorough research, and interviewing credible sources
236
+ to ensure accurate reporting on current events. The Lead News Presenter also plays
237
+ a critical role in content development and writing, as well as reviewing and editing
238
+ articles produced by other reporters to ensure alignment with the target audience's
239
+ interests. Additionally, this position requires hosting or co-hosting programs,
240
+ providing live commentary, and conducting live interviews to enhance storytelling.
241
+ While primarily based in a studio, the Lead News Presenter may travel to report
242
+ from remote locations for significant news events. The role follows a structured
243
+ work schedule but may necessitate working irregular hours, including weekends,
244
+ to cover key happenings. Strong communication skills and a solid understanding
245
+ of the news editorial process are essential, along with a background in journalism
246
+ or mass communications, familiarity with daily newscast content, and adherence
247
+ to media ethics. The ability to improvise and ad-lib during live broadcasts while
248
+ collaborating effectively with team members is also crucial.
249
+ datasets:
250
+ - Fatin757/ssf-train-valid_v3
251
+ pipeline_tag: sentence-similarity
252
+ library_name: sentence-transformers
253
+ ---
254
+
255
+ # SentenceTransformer based on answerdotai/ModernBERT-base
256
+
257
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the [ssf-train-valid_v3](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
258
+
259
+ ## Model Details
260
+
261
+ ### Model Description
262
+ - **Model Type:** Sentence Transformer
263
+ - **Base model:** [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) <!-- at revision 8949b909ec900327062f0ebf497f51aef5e6f0c8 -->
264
+ - **Maximum Sequence Length:** 8192 tokens
265
+ - **Output Dimensionality:** 768 dimensions
266
+ - **Similarity Function:** Cosine Similarity
267
+ - **Training Dataset:**
268
+ - [ssf-train-valid_v3](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3)
269
+ <!-- - **Language:** Unknown -->
270
+ <!-- - **License:** Unknown -->
271
+
272
+ ### Model Sources
273
+
274
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
275
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
276
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
277
+
278
+ ### Full Model Architecture
279
+
280
+ ```
281
+ SentenceTransformer(
282
+ (0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
283
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
284
+ )
285
+ ```
286
+
287
+ ## Usage
288
+
289
+ ### Direct Usage (Sentence Transformers)
290
+
291
+ First install the Sentence Transformers library:
292
+
293
+ ```bash
294
+ pip install -U sentence-transformers
295
+ ```
296
+
297
+ Then you can load this model and run inference.
298
+ ```python
299
+ from sentence_transformers import SentenceTransformer
300
+
301
+ # Download from the 🤗 Hub
302
+ model = SentenceTransformer("Fatin757/ssf-retriever-modernbert-v2")
303
+ # Run inference
304
+ sentences = [
305
+ "The Senior Anchor/Senior Presenter/Anchor/ Presenter - News delivers news stories to the broadcasting station's audience and is the public face or voice of the programmes broadcasted on various platforms. He/She is involved in the collection of news materials and is required to conduct research on stories and interview people who have accurate information on news events. He is also involved in the development and writing of content and is responsible for reviewing and editing materials written by other news reporters to ensure that the content is tailored to the target audience. He is required to host or co-host programmes by providing live commentaries and doing live interviews to create content that links closely to the stories. He often works from a studio and may be expected to travel in order to present news from remote locations in the field related to a particular major news event. He follows a fixed working schedule, but may be required to work at odd hours, including weekends, to cover important events. He should be an effective communicator with an understanding of news editorial process. He should ideally have a background in journalism or mass communications and possesses an understanding of daily newscast content and media ethics. He ought to be able to improvise and ad-lib in a live on-camera setting and be able to work well with others across a variety of situations.",
306
+ "The Lead News Presenter is responsible for delivering engaging news stories to the audience of the broadcasting station, serving as the public face or voice of various programs aired across multiple platforms. This role involves gathering news materials, conducting thorough research, and interviewing credible sources to ensure accurate reporting on current events. The Lead News Presenter also plays a critical role in content development and writing, as well as reviewing and editing articles produced by other reporters to ensure alignment with the target audience's interests. Additionally, this position requires hosting or co-hosting programs, providing live commentary, and conducting live interviews to enhance storytelling. While primarily based in a studio, the Lead News Presenter may travel to report from remote locations for significant news events. The role follows a structured work schedule but may necessitate working irregular hours, including weekends, to cover key happenings. Strong communication skills and a solid understanding of the news editorial process are essential, along with a background in journalism or mass communications, familiarity with daily newscast content, and adherence to media ethics. The ability to improvise and ad-lib during live broadcasts while collaborating effectively with team members is also crucial.",
307
+ "The Junior News Reporter is tasked with gathering and compiling news stories for the broadcasting station's audience, acting as a supportive figure in various programs aired on different platforms. This position includes the collection of news materials and conducting basic research on stories, as well as interviewing individuals who may provide information on news events. The Junior News Reporter assists in the writing and editing of content produced by senior reporters, ensuring it meets the audience's needs. This role may also involve supporting hosts during live programs by providing background information and conducting interviews to aid in storytelling. While primarily working from the studio, there may be occasional travel to gather information for specific news events. The Junior News Reporter typically follows a standard work schedule but might be called upon to work during odd hours, including weekends, to assist in covering significant events. Effective communication skills and a foundational understanding of media ethics are important, along with a basic knowledge of the news editorial process. The ability to work collaboratively with colleagues and adapt to various situations is also necessary.",
308
+ ]
309
+ embeddings = model.encode(sentences)
310
+ print(embeddings.shape)
311
+ # [3, 768]
312
+
313
+ # Get the similarity scores for the embeddings
314
+ similarities = model.similarity(embeddings, embeddings)
315
+ print(similarities)
316
+ # tensor([[1.0000, 0.9225, 0.4020],
317
+ # [0.9225, 1.0000, 0.4397],
318
+ # [0.4020, 0.4397, 1.0000]])
319
+ ```
320
+
321
+ <!--
322
+ ### Direct Usage (Transformers)
323
+
324
+ <details><summary>Click to see the direct usage in Transformers</summary>
325
+
326
+ </details>
327
+ -->
328
+
329
+ <!--
330
+ ### Downstream Usage (Sentence Transformers)
331
+
332
+ You can finetune this model on your own dataset.
333
+
334
+ <details><summary>Click to expand</summary>
335
+
336
+ </details>
337
+ -->
338
+
339
+ <!--
340
+ ### Out-of-Scope Use
341
+
342
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
343
+ -->
344
+
345
+ <!--
346
+ ## Bias, Risks and Limitations
347
+
348
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
349
+ -->
350
+
351
+ <!--
352
+ ### Recommendations
353
+
354
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
355
+ -->
356
+
357
+ ## Training Details
358
+
359
+ ### Training Dataset
360
+
361
+ #### ssf-train-valid_v3
362
+
363
+ * Dataset: [ssf-train-valid_v3](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3) at [5d23d8e](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3/tree/5d23d8eeff6292b6f2fb97cae6fd2f287dca7758)
364
+ * Size: 6,032 training samples
365
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
366
+ * Approximate statistics based on the first 1000 samples:
367
+ | | anchor | positive | negative |
368
+ |:--------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
369
+ | type | string | string | string |
370
+ | details | <ul><li>min: 60 tokens</li><li>mean: 168.86 tokens</li><li>max: 403 tokens</li></ul> | <ul><li>min: 74 tokens</li><li>mean: 165.4 tokens</li><li>max: 318 tokens</li></ul> | <ul><li>min: 63 tokens</li><li>mean: 139.63 tokens</li><li>max: 253 tokens</li></ul> |
371
+ * Samples:
372
+ | anchor | positive | negative |
373
+ |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
374
+ | <code>The Brokerage Supervisor/ Freight Supervisor is responsible for liaising with customers, logistics operators and customs officials and supervising the custom clearance/freight forwarding operations to ensure goods are cleared through customs or quarantine in accordance with import and export laws and regulations. Analytical and systematic, he/she is required to supervise a freight operations team to execute operations in a timely manner to meet business and customers' requirements. He/She is also expected to work with internal and external stakeholders to accomplish his work.</code> | <code>The Logistics Operations Supervisor is tasked with coordinating communication between clients, transportation providers, and regulatory agencies while overseeing the customs clearance and freight forwarding processes. This role ensures compliance with all import and export regulations, facilitating the smooth transit of goods through customs and quarantine. The ideal candidate will possess strong analytical and organizational skills, leading a team of logistics professionals to execute operations efficiently and meet both business objectives and customer expectations. Collaboration with various internal and external partners is essential to successfully fulfill the responsibilities of this position.</code> | <code>The Freight Operations Coordinator is responsible for managing interactions with suppliers, transport companies, and regulatory bodies while overseeing the delivery processes to ensure shipments are dispatched in alignment with logistics standards. This role focuses on adherence to transportation guidelines and the timely execution of delivery schedules. The successful candidate should demonstrate excellent problem-solving abilities and be capable of directing a team of logistics personnel to achieve operational targets. Coordination with different departments and external partners is crucial for the effective management of this role.</code> |
375
+ | <code>The Senior Quality Engineer evaluates and manages quality systems, tools and standards to meet business needs. He/She, as the subject matter expert, is required to identify risk areas, ensure the robustness of the risk control plans deployed for excursion free launch and conduct qualification and/or validation for new materials. He is responsible for developing a management system to ensure that operations meet both internal and external parties quality requirements. He has to take the lead in managing cross-functional teams in continuous improvement projects and assist in implementing process improvement projects. He plays an important role in organisational development through development of on-the-job training and mentoring of team leaders. The Senior Quality Engineer possesses an analytical mind and leadership skills to steer the team to perform their best and achieve the desired organisational outcomes.</code> | <code>The Quality Assurance Manager is responsible for assessing and overseeing quality systems, methodologies, and standards to align with business objectives. As an expert in the field, this individual will pinpoint areas of risk, ensure that effective risk management strategies are in place for successful product launches, and conduct thorough qualification and validation of new materials. The role involves creating a comprehensive quality management system to guarantee compliance with both internal standards and external regulations. Additionally, the Quality Assurance Manager will lead cross-functional teams in ongoing improvement initiatives and support the execution of process enhancement projects. This position is pivotal in fostering organizational growth through the development of on-the-job training programs and mentoring for team leaders. The ideal candidate will have strong analytical capabilities and leadership qualities to drive team performance and achieve key organizational ...</code> | <code>The Junior Quality Control Technician is tasked with performing routine inspections and testing of products to ensure compliance with quality standards. This role requires the technician to document findings and report any discrepancies to the senior staff. While they contribute to maintaining quality assurance processes, they are not responsible for developing management systems or leading cross-functional teams. Instead, their focus will be on executing established procedures and assisting in minor quality improvement tasks. The Junior Quality Control Technician will work under close supervision and will not engage in risk management or validation processes, limiting their involvement to basic quality checks and reporting.</code> |
376
+ | <code>The Waste Process Engineer is responsible for designing, optimising and managing processes and systems for the efficient handling, treatment, transformation and disposal of waste, including electronic waste (e-waste) and plastic waste, for an organisation. He/She is focused on minimising waste generation, exploring new technologies for enhancing waste management efficiency and material recovery, and recommending improved waste management systems and processes within an organisation. Based on life-cycle analyses and evaluation of current waste streams, systems and waste-to-resource initiatives, he will develop and implement new processes, ensuring compliance with environmental regulations. <br><br>The Waste Process Engineer must be meticulous, with an eye for detail and have strong analytical and research skills to stay up to date on best practices and circular economy strategies related to waste management. He is also expected to work collaboratively with cross-functional teams to promote su...</code> | <code>The Waste Management Engineer is tasked with the design, optimization, and oversight of processes and systems aimed at the effective handling, treatment, transformation, and disposal of various waste types, including electronic waste (e-waste) and plastics. This role emphasizes the reduction of waste generation and the exploration of innovative technologies to enhance waste management efficiency and material recovery. Additionally, the engineer will assess current waste streams and implement waste-to-resource initiatives based on thorough life-cycle analyses. Ensuring compliance with environmental regulations, the Waste Management Engineer will develop and execute new processes while collaborating with cross-functional teams to advocate for sustainable practices and advance the organization’s objectives within the Circular Economy.</code> | <code>The Environmental Compliance Officer is responsible for monitoring and enforcing adherence to environmental laws and regulations within an organization. This role focuses on assessing the impact of various operations on the environment and ensuring that all practices comply with legal standards. The officer will conduct regular audits, prepare reports, and provide training to staff about environmental policies. They will also work closely with regulatory agencies to maintain compliance and address any environmental concerns that may arise. Strong attention to detail and analytical skills are essential for this position, as is the ability to collaborate with various departments to ensure that the organization meets its sustainability goals.</code> |
377
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
378
+ ```json
379
+ {
380
+ "scale": 20.0,
381
+ "similarity_fct": "cos_sim",
382
+ "gather_across_devices": false
383
+ }
384
+ ```
385
+
386
+ ### Evaluation Dataset
387
+
388
+ #### ssf-train-valid_v3
389
+
390
+ * Dataset: [ssf-train-valid_v3](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3) at [5d23d8e](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3/tree/5d23d8eeff6292b6f2fb97cae6fd2f287dca7758)
391
+ * Size: 1,508 evaluation samples
392
+ * Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
393
+ * Approximate statistics based on the first 1000 samples:
394
+ | | anchor | positive | negative |
395
+ |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
396
+ | type | string | string | string |
397
+ | details | <ul><li>min: 57 tokens</li><li>mean: 166.64 tokens</li><li>max: 349 tokens</li></ul> | <ul><li>min: 67 tokens</li><li>mean: 162.89 tokens</li><li>max: 301 tokens</li></ul> | <ul><li>min: 65 tokens</li><li>mean: 139.7 tokens</li><li>max: 263 tokens</li></ul> |
398
+ * Samples:
399
+ | anchor | positive | negative |
400
+ |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
401
+ | <code>The Manager - Standards and Practices (S&P) ensures that content delivered by the organisation complies with the regulatory requirements and censorship norms of the local territories where the content may be available. He/She also provides advisory ratings for the content based on the regulatory guidelines. He keeps abreast of the local, cultural and political norms and sensitivities to support the creation of content classification guidelines. The work involves coordinating internal and external processes for delivery within tight timelines. He is highly accountable for the organisation's brand and reputation given the sensitivities of content classification. He should be comfortable coordinating with internal and external stakeholders in order to balance the organisation's priorities with compliance to guidelines and norms. He should be effective at planning and organising. He should also be aware of the regulatory, political and cultural landscape and possess a keen eye for detail t...</code> | <code>The Content Compliance Manager plays a crucial role in ensuring that all materials produced by the organization meet the necessary regulatory standards and censorship requirements of the respective local markets. This individual will provide expert advisory ratings for content in alignment with established regulatory frameworks. Staying informed about local cultural and political dynamics is essential to aid in the development of content classification guidelines. The role requires effective coordination of both internal and external processes to ensure timely delivery while maintaining the integrity of the organization's brand and reputation. The ideal candidate will excel in stakeholder engagement, balancing organizational objectives with compliance mandates. Strong planning, organizational skills, and a meticulous attention to detail are vital for identifying compliance issues during content reviews. Proficiency in communication and stakeholder management is necessary for successful...</code> | <code>The Junior Risk Analyst is responsible for evaluating potential risks that may impact the organization’s operations and financial performance. This role involves conducting thorough assessments of various risk factors and preparing detailed reports on findings. The Junior Risk Analyst will collaborate with different departments to identify risk mitigation strategies and ensure that appropriate measures are in place. Strong analytical skills and attention to detail are essential for identifying potential vulnerabilities within the organization. The position requires effective communication with team members and management to discuss risk assessments and recommendations. The ideal candidate should also be comfortable working under tight deadlines while maintaining a high level of accuracy in their analyses.</code> |
402
+ | <code>The Psychologist provides psychological services within multi-disciplinary settings, applying a wide array of psychometric assessments and treatments for clients with routine psychological conditions. He/She conducts psychoeducational programmes and training for other professionals. He is a resourceful, proactive and collaborative professional, and works in varied settings such as public and private institutions, hospitals, healthcare and voluntary welfare organisations. He works under supervision and in collaboration with other professionals in the course of his work.</code> | <code>The Clinical Psychologist delivers comprehensive psychological services in diverse multi-disciplinary environments, utilizing a broad spectrum of psychometric evaluations and therapeutic interventions for clients experiencing common psychological issues. This role involves designing and implementing psychoeducational programs and training sessions aimed at enhancing the skills of fellow professionals. The ideal candidate is resourceful, proactive, and excels in collaboration, working effectively across various settings including public and private institutions, hospitals, healthcare facilities, and voluntary welfare organizations. The Clinical Psychologist operates under supervision while engaging collaboratively with other professionals to ensure the best outcomes for clients.</code> | <code>The Clinical Psychologist conducts psychological assessments within corporate environments, focusing primarily on employee mental health and organizational behavior. This position emphasizes the development and implementation of workplace wellness programs and training for management teams. The successful candidate will be an innovative, self-motivated individual who thrives in collaborative settings, working closely with human resources and management to address workplace-related psychological issues. The Clinical Psychologist operates independently while coordinating with various departments to enhance employee well-being and productivity in a corporate context.</code> |
403
+ | <code>The Planning Manager (Aircraft Engine / Component Maintenance) is responsible for strategising long-term supply chain management plans, optimising aircraft engine and component maintenance planning and resource management, and driving supplier capability and performance enhancement programmes. He/She develops vendor management strategies and leads technology application to strengthen sourcing, inventory and warehousing operations. He provides project management oversight for engine servicing operations and manages technical and programme reviews with customers and suppliers. He manages compliance with airworthiness and legislative requirements, and contributes to development of the organisation's standard operating procedures (SOPs), management systems, lean and sustainability practices, and data analytics plans for strategic decision-making. He drives team performance to achieve business key performance indicators (KPIs) and leads talent recruitment and development plans. He should de...</code> | <code>The Aircraft Maintenance Planning Specialist is tasked with formulating comprehensive long-term strategies for supply chain management, focusing on the optimization of maintenance planning for aircraft engines and components. This role involves enhancing supplier capabilities and performance through targeted programs. The specialist will develop effective vendor management strategies and leverage technology to improve sourcing, inventory control, and warehousing operations. Additionally, they will oversee project management for engine servicing activities and facilitate technical and program reviews with both customers and suppliers. Compliance with airworthiness regulations and legislative requirements is critical, as is contributing to the organization's standard operating procedures (SOPs), management systems, and data analytics initiatives for informed decision-making. The specialist will also drive team performance to meet key performance indicators (KPIs) and lead initiatives for...</code> | <code>The Aircraft Component Quality Assurance Coordinator is responsible for implementing quality control measures and ensuring compliance with industry standards in the maintenance of aircraft components. This role focuses on conducting inspections and audits to assess the performance of suppliers and service providers. The coordinator develops quality assurance strategies and applies technology to enhance inspection processes and documentation practices. They will manage quality-related projects and collaborate with engineering teams to address any compliance issues with regulatory requirements. Additionally, the coordinator contributes to the development of quality management systems and participates in data analysis for quality improvement initiatives. They are expected to drive team engagement to achieve quality performance metrics and support training and development programs for staff. Strong attention to detail, analytical skills, and the ability to work collaboratively across depar...</code> |
404
+ * Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
405
+ ```json
406
+ {
407
+ "scale": 20.0,
408
+ "similarity_fct": "cos_sim",
409
+ "gather_across_devices": false
410
+ }
411
+ ```
412
+
413
+ ### Training Hyperparameters
414
+ #### Non-Default Hyperparameters
415
+
416
+ - `eval_strategy`: epoch
417
+ - `per_device_train_batch_size`: 32
418
+ - `per_device_eval_batch_size`: 16
419
+ - `num_train_epochs`: 5
420
+ - `lr_scheduler_type`: cosine
421
+ - `warmup_ratio`: 0.1
422
+ - `bf16`: True
423
+ - `load_best_model_at_end`: True
424
+ - `batch_sampler`: no_duplicates
425
+
426
+ #### All Hyperparameters
427
+ <details><summary>Click to expand</summary>
428
+
429
+ - `overwrite_output_dir`: False
430
+ - `do_predict`: False
431
+ - `eval_strategy`: epoch
432
+ - `prediction_loss_only`: True
433
+ - `per_device_train_batch_size`: 32
434
+ - `per_device_eval_batch_size`: 16
435
+ - `per_gpu_train_batch_size`: None
436
+ - `per_gpu_eval_batch_size`: None
437
+ - `gradient_accumulation_steps`: 1
438
+ - `eval_accumulation_steps`: None
439
+ - `torch_empty_cache_steps`: None
440
+ - `learning_rate`: 5e-05
441
+ - `weight_decay`: 0.0
442
+ - `adam_beta1`: 0.9
443
+ - `adam_beta2`: 0.999
444
+ - `adam_epsilon`: 1e-08
445
+ - `max_grad_norm`: 1.0
446
+ - `num_train_epochs`: 5
447
+ - `max_steps`: -1
448
+ - `lr_scheduler_type`: cosine
449
+ - `lr_scheduler_kwargs`: {}
450
+ - `warmup_ratio`: 0.1
451
+ - `warmup_steps`: 0
452
+ - `log_level`: passive
453
+ - `log_level_replica`: warning
454
+ - `log_on_each_node`: True
455
+ - `logging_nan_inf_filter`: True
456
+ - `save_safetensors`: True
457
+ - `save_on_each_node`: False
458
+ - `save_only_model`: False
459
+ - `restore_callback_states_from_checkpoint`: False
460
+ - `no_cuda`: False
461
+ - `use_cpu`: False
462
+ - `use_mps_device`: False
463
+ - `seed`: 42
464
+ - `data_seed`: None
465
+ - `jit_mode_eval`: False
466
+ - `use_ipex`: False
467
+ - `bf16`: True
468
+ - `fp16`: False
469
+ - `fp16_opt_level`: O1
470
+ - `half_precision_backend`: auto
471
+ - `bf16_full_eval`: False
472
+ - `fp16_full_eval`: False
473
+ - `tf32`: None
474
+ - `local_rank`: 0
475
+ - `ddp_backend`: None
476
+ - `tpu_num_cores`: None
477
+ - `tpu_metrics_debug`: False
478
+ - `debug`: []
479
+ - `dataloader_drop_last`: False
480
+ - `dataloader_num_workers`: 0
481
+ - `dataloader_prefetch_factor`: None
482
+ - `past_index`: -1
483
+ - `disable_tqdm`: False
484
+ - `remove_unused_columns`: True
485
+ - `label_names`: None
486
+ - `load_best_model_at_end`: True
487
+ - `ignore_data_skip`: False
488
+ - `fsdp`: []
489
+ - `fsdp_min_num_params`: 0
490
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
491
+ - `fsdp_transformer_layer_cls_to_wrap`: None
492
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
493
+ - `parallelism_config`: None
494
+ - `deepspeed`: None
495
+ - `label_smoothing_factor`: 0.0
496
+ - `optim`: adamw_torch_fused
497
+ - `optim_args`: None
498
+ - `adafactor`: False
499
+ - `group_by_length`: False
500
+ - `length_column_name`: length
501
+ - `ddp_find_unused_parameters`: None
502
+ - `ddp_bucket_cap_mb`: None
503
+ - `ddp_broadcast_buffers`: False
504
+ - `dataloader_pin_memory`: True
505
+ - `dataloader_persistent_workers`: False
506
+ - `skip_memory_metrics`: True
507
+ - `use_legacy_prediction_loop`: False
508
+ - `push_to_hub`: False
509
+ - `resume_from_checkpoint`: None
510
+ - `hub_model_id`: None
511
+ - `hub_strategy`: every_save
512
+ - `hub_private_repo`: None
513
+ - `hub_always_push`: False
514
+ - `hub_revision`: None
515
+ - `gradient_checkpointing`: False
516
+ - `gradient_checkpointing_kwargs`: None
517
+ - `include_inputs_for_metrics`: False
518
+ - `include_for_metrics`: []
519
+ - `eval_do_concat_batches`: True
520
+ - `fp16_backend`: auto
521
+ - `push_to_hub_model_id`: None
522
+ - `push_to_hub_organization`: None
523
+ - `mp_parameters`:
524
+ - `auto_find_batch_size`: False
525
+ - `full_determinism`: False
526
+ - `torchdynamo`: None
527
+ - `ray_scope`: last
528
+ - `ddp_timeout`: 1800
529
+ - `torch_compile`: False
530
+ - `torch_compile_backend`: None
531
+ - `torch_compile_mode`: None
532
+ - `include_tokens_per_second`: False
533
+ - `include_num_input_tokens_seen`: False
534
+ - `neftune_noise_alpha`: None
535
+ - `optim_target_modules`: None
536
+ - `batch_eval_metrics`: False
537
+ - `eval_on_start`: False
538
+ - `use_liger_kernel`: False
539
+ - `liger_kernel_config`: None
540
+ - `eval_use_gather_object`: False
541
+ - `average_tokens_across_devices`: False
542
+ - `prompts`: None
543
+ - `batch_sampler`: no_duplicates
544
+ - `multi_dataset_batch_sampler`: proportional
545
+ - `router_mapping`: {}
546
+ - `learning_rate_mapping`: {}
547
+
548
+ </details>
549
+
550
+ ### Training Logs
551
+ | Epoch | Step | Training Loss | Validation Loss |
552
+ |:-------:|:-------:|:-------------:|:---------------:|
553
+ | 1.0 | 189 | 0.3362 | 0.0096 |
554
+ | 2.0 | 378 | 0.0079 | 0.0056 |
555
+ | 3.0 | 567 | 0.0029 | 0.0042 |
556
+ | 4.0 | 756 | 0.0026 | 0.0037 |
557
+ | **5.0** | **945** | **0.0023** | **0.0032** |
558
+
559
+ * The bold row denotes the saved checkpoint.
560
+
561
+ ### Framework Versions
562
+ - Python: 3.12.11
563
+ - Sentence Transformers: 5.1.0
564
+ - Transformers: 4.56.1
565
+ - PyTorch: 2.8.0+cu128
566
+ - Accelerate: 1.10.0
567
+ - Datasets: 4.0.0
568
+ - Tokenizers: 0.22.0
569
+
570
+ ## Citation
571
+
572
+ ### BibTeX
573
+
574
+ #### Sentence Transformers
575
+ ```bibtex
576
+ @inproceedings{reimers-2019-sentence-bert,
577
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
578
+ author = "Reimers, Nils and Gurevych, Iryna",
579
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
580
+ month = "11",
581
+ year = "2019",
582
+ publisher = "Association for Computational Linguistics",
583
+ url = "https://arxiv.org/abs/1908.10084",
584
+ }
585
+ ```
586
+
587
+ #### MultipleNegativesRankingLoss
588
+ ```bibtex
589
+ @misc{henderson2017efficient,
590
+ title={Efficient Natural Language Response Suggestion for Smart Reply},
591
+ author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
592
+ year={2017},
593
+ eprint={1705.00652},
594
+ archivePrefix={arXiv},
595
+ primaryClass={cs.CL}
596
+ }
597
+ ```
598
+
599
+ <!--
600
+ ## Glossary
601
+
602
+ *Clearly define terms in order to be accessible across audiences.*
603
+ -->
604
+
605
+ <!--
606
+ ## Model Card Authors
607
+
608
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
609
+ -->
610
+
611
+ <!--
612
+ ## Model Card Contact
613
+
614
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
615
+ -->
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "ModernBertModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 50281,
8
+ "classifier_activation": "gelu",
9
+ "classifier_bias": false,
10
+ "classifier_dropout": 0.0,
11
+ "classifier_pooling": "mean",
12
+ "cls_token_id": 50281,
13
+ "decoder_bias": true,
14
+ "deterministic_flash_attn": false,
15
+ "dtype": "float32",
16
+ "embedding_dropout": 0.0,
17
+ "eos_token_id": 50282,
18
+ "global_attn_every_n_layers": 3,
19
+ "global_rope_theta": 160000.0,
20
+ "gradient_checkpointing": false,
21
+ "hidden_activation": "gelu",
22
+ "hidden_size": 768,
23
+ "initializer_cutoff_factor": 2.0,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 1152,
26
+ "layer_norm_eps": 1e-05,
27
+ "local_attention": 128,
28
+ "local_rope_theta": 10000.0,
29
+ "max_position_embeddings": 8192,
30
+ "mlp_bias": false,
31
+ "mlp_dropout": 0.0,
32
+ "model_type": "modernbert",
33
+ "norm_bias": false,
34
+ "norm_eps": 1e-05,
35
+ "num_attention_heads": 12,
36
+ "num_hidden_layers": 22,
37
+ "pad_token_id": 50283,
38
+ "position_embedding_type": "absolute",
39
+ "repad_logits_with_grad": false,
40
+ "sep_token_id": 50282,
41
+ "sparse_pred_ignore_index": -100,
42
+ "sparse_prediction": false,
43
+ "transformers_version": "4.56.1",
44
+ "vocab_size": 50368
45
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "SentenceTransformer",
3
+ "__version__": {
4
+ "sentence_transformers": "5.1.0",
5
+ "transformers": "4.56.1",
6
+ "pytorch": "2.8.0+cu128"
7
+ },
8
+ "prompts": {
9
+ "query": "",
10
+ "document": ""
11
+ },
12
+ "default_prompt_name": null,
13
+ "similarity_fn_name": "cosine"
14
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cd52551d2c00c1a833fc5a6a467ed75eee182b1c8744f20ca5181434d0a0a0f
3
+ size 596070136
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": true,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,945 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "|||IP_ADDRESS|||",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "1": {
12
+ "content": "<|padding|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "50254": {
20
+ "content": " ",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "50255": {
28
+ "content": " ",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "50256": {
36
+ "content": " ",
37
+ "lstrip": false,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "50257": {
44
+ "content": " ",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "50258": {
52
+ "content": " ",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "50259": {
60
+ "content": " ",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "50260": {
68
+ "content": " ",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "50261": {
76
+ "content": " ",
77
+ "lstrip": false,
78
+ "normalized": true,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": false
82
+ },
83
+ "50262": {
84
+ "content": " ",
85
+ "lstrip": false,
86
+ "normalized": true,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": false
90
+ },
91
+ "50263": {
92
+ "content": " ",
93
+ "lstrip": false,
94
+ "normalized": true,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": false
98
+ },
99
+ "50264": {
100
+ "content": " ",
101
+ "lstrip": false,
102
+ "normalized": true,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": false
106
+ },
107
+ "50265": {
108
+ "content": " ",
109
+ "lstrip": false,
110
+ "normalized": true,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": false
114
+ },
115
+ "50266": {
116
+ "content": " ",
117
+ "lstrip": false,
118
+ "normalized": true,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": false
122
+ },
123
+ "50267": {
124
+ "content": " ",
125
+ "lstrip": false,
126
+ "normalized": true,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": false
130
+ },
131
+ "50268": {
132
+ "content": " ",
133
+ "lstrip": false,
134
+ "normalized": true,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": false
138
+ },
139
+ "50269": {
140
+ "content": " ",
141
+ "lstrip": false,
142
+ "normalized": true,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": false
146
+ },
147
+ "50270": {
148
+ "content": " ",
149
+ "lstrip": false,
150
+ "normalized": true,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": false
154
+ },
155
+ "50271": {
156
+ "content": " ",
157
+ "lstrip": false,
158
+ "normalized": true,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": false
162
+ },
163
+ "50272": {
164
+ "content": " ",
165
+ "lstrip": false,
166
+ "normalized": true,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": false
170
+ },
171
+ "50273": {
172
+ "content": " ",
173
+ "lstrip": false,
174
+ "normalized": true,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": false
178
+ },
179
+ "50274": {
180
+ "content": " ",
181
+ "lstrip": false,
182
+ "normalized": true,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": false
186
+ },
187
+ "50275": {
188
+ "content": " ",
189
+ "lstrip": false,
190
+ "normalized": true,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": false
194
+ },
195
+ "50276": {
196
+ "content": " ",
197
+ "lstrip": false,
198
+ "normalized": true,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": false
202
+ },
203
+ "50277": {
204
+ "content": "|||EMAIL_ADDRESS|||",
205
+ "lstrip": false,
206
+ "normalized": true,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": false
210
+ },
211
+ "50278": {
212
+ "content": "|||PHONE_NUMBER|||",
213
+ "lstrip": false,
214
+ "normalized": true,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": false
218
+ },
219
+ "50279": {
220
+ "content": "<|endoftext|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "50280": {
228
+ "content": "[UNK]",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "50281": {
236
+ "content": "[CLS]",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "50282": {
244
+ "content": "[SEP]",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "50283": {
252
+ "content": "[PAD]",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "50284": {
260
+ "content": "[MASK]",
261
+ "lstrip": true,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "50285": {
268
+ "content": "[unused0]",
269
+ "lstrip": false,
270
+ "normalized": true,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": false
274
+ },
275
+ "50286": {
276
+ "content": "[unused1]",
277
+ "lstrip": false,
278
+ "normalized": true,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": false
282
+ },
283
+ "50287": {
284
+ "content": "[unused2]",
285
+ "lstrip": false,
286
+ "normalized": true,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": false
290
+ },
291
+ "50288": {
292
+ "content": "[unused3]",
293
+ "lstrip": false,
294
+ "normalized": true,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": false
298
+ },
299
+ "50289": {
300
+ "content": "[unused4]",
301
+ "lstrip": false,
302
+ "normalized": true,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": false
306
+ },
307
+ "50290": {
308
+ "content": "[unused5]",
309
+ "lstrip": false,
310
+ "normalized": true,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": false
314
+ },
315
+ "50291": {
316
+ "content": "[unused6]",
317
+ "lstrip": false,
318
+ "normalized": true,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": false
322
+ },
323
+ "50292": {
324
+ "content": "[unused7]",
325
+ "lstrip": false,
326
+ "normalized": true,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": false
330
+ },
331
+ "50293": {
332
+ "content": "[unused8]",
333
+ "lstrip": false,
334
+ "normalized": true,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": false
338
+ },
339
+ "50294": {
340
+ "content": "[unused9]",
341
+ "lstrip": false,
342
+ "normalized": true,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": false
346
+ },
347
+ "50295": {
348
+ "content": "[unused10]",
349
+ "lstrip": false,
350
+ "normalized": true,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": false
354
+ },
355
+ "50296": {
356
+ "content": "[unused11]",
357
+ "lstrip": false,
358
+ "normalized": true,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": false
362
+ },
363
+ "50297": {
364
+ "content": "[unused12]",
365
+ "lstrip": false,
366
+ "normalized": true,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": false
370
+ },
371
+ "50298": {
372
+ "content": "[unused13]",
373
+ "lstrip": false,
374
+ "normalized": true,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": false
378
+ },
379
+ "50299": {
380
+ "content": "[unused14]",
381
+ "lstrip": false,
382
+ "normalized": true,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": false
386
+ },
387
+ "50300": {
388
+ "content": "[unused15]",
389
+ "lstrip": false,
390
+ "normalized": true,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": false
394
+ },
395
+ "50301": {
396
+ "content": "[unused16]",
397
+ "lstrip": false,
398
+ "normalized": true,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": false
402
+ },
403
+ "50302": {
404
+ "content": "[unused17]",
405
+ "lstrip": false,
406
+ "normalized": true,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": false
410
+ },
411
+ "50303": {
412
+ "content": "[unused18]",
413
+ "lstrip": false,
414
+ "normalized": true,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": false
418
+ },
419
+ "50304": {
420
+ "content": "[unused19]",
421
+ "lstrip": false,
422
+ "normalized": true,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": false
426
+ },
427
+ "50305": {
428
+ "content": "[unused20]",
429
+ "lstrip": false,
430
+ "normalized": true,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": false
434
+ },
435
+ "50306": {
436
+ "content": "[unused21]",
437
+ "lstrip": false,
438
+ "normalized": true,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": false
442
+ },
443
+ "50307": {
444
+ "content": "[unused22]",
445
+ "lstrip": false,
446
+ "normalized": true,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": false
450
+ },
451
+ "50308": {
452
+ "content": "[unused23]",
453
+ "lstrip": false,
454
+ "normalized": true,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": false
458
+ },
459
+ "50309": {
460
+ "content": "[unused24]",
461
+ "lstrip": false,
462
+ "normalized": true,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": false
466
+ },
467
+ "50310": {
468
+ "content": "[unused25]",
469
+ "lstrip": false,
470
+ "normalized": true,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": false
474
+ },
475
+ "50311": {
476
+ "content": "[unused26]",
477
+ "lstrip": false,
478
+ "normalized": true,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": false
482
+ },
483
+ "50312": {
484
+ "content": "[unused27]",
485
+ "lstrip": false,
486
+ "normalized": true,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": false
490
+ },
491
+ "50313": {
492
+ "content": "[unused28]",
493
+ "lstrip": false,
494
+ "normalized": true,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": false
498
+ },
499
+ "50314": {
500
+ "content": "[unused29]",
501
+ "lstrip": false,
502
+ "normalized": true,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": false
506
+ },
507
+ "50315": {
508
+ "content": "[unused30]",
509
+ "lstrip": false,
510
+ "normalized": true,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": false
514
+ },
515
+ "50316": {
516
+ "content": "[unused31]",
517
+ "lstrip": false,
518
+ "normalized": true,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": false
522
+ },
523
+ "50317": {
524
+ "content": "[unused32]",
525
+ "lstrip": false,
526
+ "normalized": true,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": false
530
+ },
531
+ "50318": {
532
+ "content": "[unused33]",
533
+ "lstrip": false,
534
+ "normalized": true,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": false
538
+ },
539
+ "50319": {
540
+ "content": "[unused34]",
541
+ "lstrip": false,
542
+ "normalized": true,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": false
546
+ },
547
+ "50320": {
548
+ "content": "[unused35]",
549
+ "lstrip": false,
550
+ "normalized": true,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": false
554
+ },
555
+ "50321": {
556
+ "content": "[unused36]",
557
+ "lstrip": false,
558
+ "normalized": true,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": false
562
+ },
563
+ "50322": {
564
+ "content": "[unused37]",
565
+ "lstrip": false,
566
+ "normalized": true,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": false
570
+ },
571
+ "50323": {
572
+ "content": "[unused38]",
573
+ "lstrip": false,
574
+ "normalized": true,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": false
578
+ },
579
+ "50324": {
580
+ "content": "[unused39]",
581
+ "lstrip": false,
582
+ "normalized": true,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": false
586
+ },
587
+ "50325": {
588
+ "content": "[unused40]",
589
+ "lstrip": false,
590
+ "normalized": true,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": false
594
+ },
595
+ "50326": {
596
+ "content": "[unused41]",
597
+ "lstrip": false,
598
+ "normalized": true,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": false
602
+ },
603
+ "50327": {
604
+ "content": "[unused42]",
605
+ "lstrip": false,
606
+ "normalized": true,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": false
610
+ },
611
+ "50328": {
612
+ "content": "[unused43]",
613
+ "lstrip": false,
614
+ "normalized": true,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": false
618
+ },
619
+ "50329": {
620
+ "content": "[unused44]",
621
+ "lstrip": false,
622
+ "normalized": true,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": false
626
+ },
627
+ "50330": {
628
+ "content": "[unused45]",
629
+ "lstrip": false,
630
+ "normalized": true,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": false
634
+ },
635
+ "50331": {
636
+ "content": "[unused46]",
637
+ "lstrip": false,
638
+ "normalized": true,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": false
642
+ },
643
+ "50332": {
644
+ "content": "[unused47]",
645
+ "lstrip": false,
646
+ "normalized": true,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": false
650
+ },
651
+ "50333": {
652
+ "content": "[unused48]",
653
+ "lstrip": false,
654
+ "normalized": true,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": false
658
+ },
659
+ "50334": {
660
+ "content": "[unused49]",
661
+ "lstrip": false,
662
+ "normalized": true,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": false
666
+ },
667
+ "50335": {
668
+ "content": "[unused50]",
669
+ "lstrip": false,
670
+ "normalized": true,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": false
674
+ },
675
+ "50336": {
676
+ "content": "[unused51]",
677
+ "lstrip": false,
678
+ "normalized": true,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": false
682
+ },
683
+ "50337": {
684
+ "content": "[unused52]",
685
+ "lstrip": false,
686
+ "normalized": true,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": false
690
+ },
691
+ "50338": {
692
+ "content": "[unused53]",
693
+ "lstrip": false,
694
+ "normalized": true,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": false
698
+ },
699
+ "50339": {
700
+ "content": "[unused54]",
701
+ "lstrip": false,
702
+ "normalized": true,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": false
706
+ },
707
+ "50340": {
708
+ "content": "[unused55]",
709
+ "lstrip": false,
710
+ "normalized": true,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": false
714
+ },
715
+ "50341": {
716
+ "content": "[unused56]",
717
+ "lstrip": false,
718
+ "normalized": true,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": false
722
+ },
723
+ "50342": {
724
+ "content": "[unused57]",
725
+ "lstrip": false,
726
+ "normalized": true,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": false
730
+ },
731
+ "50343": {
732
+ "content": "[unused58]",
733
+ "lstrip": false,
734
+ "normalized": true,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": false
738
+ },
739
+ "50344": {
740
+ "content": "[unused59]",
741
+ "lstrip": false,
742
+ "normalized": true,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": false
746
+ },
747
+ "50345": {
748
+ "content": "[unused60]",
749
+ "lstrip": false,
750
+ "normalized": true,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": false
754
+ },
755
+ "50346": {
756
+ "content": "[unused61]",
757
+ "lstrip": false,
758
+ "normalized": true,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": false
762
+ },
763
+ "50347": {
764
+ "content": "[unused62]",
765
+ "lstrip": false,
766
+ "normalized": true,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": false
770
+ },
771
+ "50348": {
772
+ "content": "[unused63]",
773
+ "lstrip": false,
774
+ "normalized": true,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": false
778
+ },
779
+ "50349": {
780
+ "content": "[unused64]",
781
+ "lstrip": false,
782
+ "normalized": true,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": false
786
+ },
787
+ "50350": {
788
+ "content": "[unused65]",
789
+ "lstrip": false,
790
+ "normalized": true,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": false
794
+ },
795
+ "50351": {
796
+ "content": "[unused66]",
797
+ "lstrip": false,
798
+ "normalized": true,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": false
802
+ },
803
+ "50352": {
804
+ "content": "[unused67]",
805
+ "lstrip": false,
806
+ "normalized": true,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": false
810
+ },
811
+ "50353": {
812
+ "content": "[unused68]",
813
+ "lstrip": false,
814
+ "normalized": true,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": false
818
+ },
819
+ "50354": {
820
+ "content": "[unused69]",
821
+ "lstrip": false,
822
+ "normalized": true,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": false
826
+ },
827
+ "50355": {
828
+ "content": "[unused70]",
829
+ "lstrip": false,
830
+ "normalized": true,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": false
834
+ },
835
+ "50356": {
836
+ "content": "[unused71]",
837
+ "lstrip": false,
838
+ "normalized": true,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": false
842
+ },
843
+ "50357": {
844
+ "content": "[unused72]",
845
+ "lstrip": false,
846
+ "normalized": true,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": false
850
+ },
851
+ "50358": {
852
+ "content": "[unused73]",
853
+ "lstrip": false,
854
+ "normalized": true,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": false
858
+ },
859
+ "50359": {
860
+ "content": "[unused74]",
861
+ "lstrip": false,
862
+ "normalized": true,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": false
866
+ },
867
+ "50360": {
868
+ "content": "[unused75]",
869
+ "lstrip": false,
870
+ "normalized": true,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": false
874
+ },
875
+ "50361": {
876
+ "content": "[unused76]",
877
+ "lstrip": false,
878
+ "normalized": true,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": false
882
+ },
883
+ "50362": {
884
+ "content": "[unused77]",
885
+ "lstrip": false,
886
+ "normalized": true,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": false
890
+ },
891
+ "50363": {
892
+ "content": "[unused78]",
893
+ "lstrip": false,
894
+ "normalized": true,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": false
898
+ },
899
+ "50364": {
900
+ "content": "[unused79]",
901
+ "lstrip": false,
902
+ "normalized": true,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": false
906
+ },
907
+ "50365": {
908
+ "content": "[unused80]",
909
+ "lstrip": false,
910
+ "normalized": true,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": false
914
+ },
915
+ "50366": {
916
+ "content": "[unused81]",
917
+ "lstrip": false,
918
+ "normalized": true,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": false
922
+ },
923
+ "50367": {
924
+ "content": "[unused82]",
925
+ "lstrip": false,
926
+ "normalized": true,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": false
930
+ }
931
+ },
932
+ "clean_up_tokenization_spaces": true,
933
+ "cls_token": "[CLS]",
934
+ "extra_special_tokens": {},
935
+ "mask_token": "[MASK]",
936
+ "model_input_names": [
937
+ "input_ids",
938
+ "attention_mask"
939
+ ],
940
+ "model_max_length": 8192,
941
+ "pad_token": "[PAD]",
942
+ "sep_token": "[SEP]",
943
+ "tokenizer_class": "PreTrainedTokenizerFast",
944
+ "unk_token": "[UNK]"
945
+ }