Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +615 -0
- config.json +45 -0
- config_sentence_transformers.json +14 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +945 -0
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"word_embedding_dimension": 768,
|
| 3 |
+
"pooling_mode_cls_token": false,
|
| 4 |
+
"pooling_mode_mean_tokens": true,
|
| 5 |
+
"pooling_mode_max_tokens": false,
|
| 6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
| 7 |
+
"pooling_mode_weightedmean_tokens": false,
|
| 8 |
+
"pooling_mode_lasttoken": false,
|
| 9 |
+
"include_prompt": true
|
| 10 |
+
}
|
README.md
ADDED
|
@@ -0,0 +1,615 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- sentence-transformers
|
| 4 |
+
- sentence-similarity
|
| 5 |
+
- feature-extraction
|
| 6 |
+
- dense
|
| 7 |
+
- generated_from_trainer
|
| 8 |
+
- dataset_size:6032
|
| 9 |
+
- loss:MultipleNegativesRankingLoss
|
| 10 |
+
base_model: answerdotai/ModernBERT-base
|
| 11 |
+
widget:
|
| 12 |
+
- source_sentence: The Field Sales Executive/Key Account Executive/Sales Operations
|
| 13 |
+
Management Specialist is responsible for being the contact point with commercial
|
| 14 |
+
accounts on various logistics services. He/She is also responsible to provide
|
| 15 |
+
support in identifying potential customers, establishing partnerships to expand
|
| 16 |
+
the company's businesses, selling solutions and participating in programmes targeting
|
| 17 |
+
different customers with support of internal departments in building customer
|
| 18 |
+
relationships. Resourceful and analytical, he is required to understand customer
|
| 19 |
+
needs and convince customers to adopt the proposed solutions.
|
| 20 |
+
sentences:
|
| 21 |
+
- The Sales Operations Coordinator is responsible for being the main point of contact
|
| 22 |
+
with retail clients on various marketing services. He/She is also tasked with
|
| 23 |
+
providing assistance in identifying potential partners, establishing collaborations
|
| 24 |
+
to grow the company's market reach, promoting services, and participating in initiatives
|
| 25 |
+
targeting diverse clients with support from different departments in nurturing
|
| 26 |
+
client connections. Resourceful and detail-oriented, he is required to comprehend
|
| 27 |
+
client preferences and influence clients to consider the proposed offerings.
|
| 28 |
+
- The Key Account Manager is tasked with serving as the primary liaison for commercial
|
| 29 |
+
clients regarding a range of logistics services. This role involves identifying
|
| 30 |
+
potential customers, fostering partnerships to enhance the company's market presence,
|
| 31 |
+
and delivering tailored solutions. The Key Account Manager collaborates with internal
|
| 32 |
+
teams to develop strategies aimed at strengthening customer relationships. With
|
| 33 |
+
a resourceful and analytical mindset, he/she must grasp customer requirements
|
| 34 |
+
and effectively persuade clients to embrace the recommended solutions.
|
| 35 |
+
- The Civil and Structural Engineering Manager is responsible for the comprehensive
|
| 36 |
+
management and execution of engineering projects, ensuring alignment with specific
|
| 37 |
+
project requirements. This role involves reviewing designs, specifications, calculations,
|
| 38 |
+
and various submissions to maintain project integrity. The manager oversees all
|
| 39 |
+
assessment phases of projects, guaranteeing that developed designs and models
|
| 40 |
+
meet the established criteria. Additionally, he/she leads the tendering process
|
| 41 |
+
and supervises a team of engineers, fostering a culture of continuous performance
|
| 42 |
+
enhancement. The position may also entail fulfilling the duties of a Qualified
|
| 43 |
+
Person as stipulated by the Building Control Act. The ideal candidate will have
|
| 44 |
+
strong analytical, problem-solving, and decision-making abilities, along with
|
| 45 |
+
expertise in civil and structural engineering practices and project management.
|
| 46 |
+
Exceptional leadership and communication skills are essential, and the role requires
|
| 47 |
+
a balance of office work and on-site project involvement.
|
| 48 |
+
- source_sentence: The Capacity Management Executive assists in capacity planning
|
| 49 |
+
and management, making recommendations on cargo mixtures based on profit maximisation
|
| 50 |
+
and customer demand, and proposing alternatives and contingencies to handle capacity
|
| 51 |
+
issues. He/She coordinates cargo handling and transhipment operations with business
|
| 52 |
+
partners and stakeholders and is responsible for ensuring dangerous cargo is handled
|
| 53 |
+
correctly. He tracks vessel movements and assist in adhoc route adjustments to
|
| 54 |
+
maximise voyage yields and minimise operational expenses. He possesses an innovative
|
| 55 |
+
mind-set and can work under tight deadlines.
|
| 56 |
+
sentences:
|
| 57 |
+
- The Lighting Technician is responsible for overseeing the installation and operation
|
| 58 |
+
of lighting systems for various productions. This role requires proficiency in
|
| 59 |
+
working at heights and operating elevated work platforms, as well as a solid understanding
|
| 60 |
+
of basic electrical principles. Adherence to workplace safety and health regulations
|
| 61 |
+
is essential. The Lighting Technician serves as the primary point of contact for
|
| 62 |
+
any safety concerns or incidents that may arise. Additionally, this position may
|
| 63 |
+
involve supervising entry-level lighting staff to ensure that lighting setups
|
| 64 |
+
are executed accurately and safely before and during events. Depending on their
|
| 65 |
+
skills and experiences, Lighting Technicians may also engage in specialized tasks.
|
| 66 |
+
They can work either on a full-time or casual basis across venues, rental companies,
|
| 67 |
+
production firms, or directly within production teams.
|
| 68 |
+
- The Cargo Optimization Specialist plays a crucial role in overseeing capacity
|
| 69 |
+
planning and management within the logistics sector. This position involves analyzing
|
| 70 |
+
cargo compositions to enhance profitability while meeting customer needs, as well
|
| 71 |
+
as suggesting alternatives and contingency plans to address any capacity challenges.
|
| 72 |
+
The specialist collaborates closely with business partners and stakeholders to
|
| 73 |
+
ensure efficient cargo handling and transshipment operations, with a strong emphasis
|
| 74 |
+
on the safe handling of hazardous materials. Additionally, they monitor vessel
|
| 75 |
+
movements and assist in making real-time route adjustments aimed at maximizing
|
| 76 |
+
voyage profitability and minimizing operational costs. The ideal candidate will
|
| 77 |
+
demonstrate innovative thinking and the ability to thrive under pressure.
|
| 78 |
+
- The Junior Risk Analyst is responsible for identifying and assessing potential
|
| 79 |
+
risks within the financial services sector. In this role, the analyst gathers
|
| 80 |
+
data to evaluate risk exposure and develops strategies to mitigate these risks.
|
| 81 |
+
They work with various teams to ensure compliance with regulatory standards and
|
| 82 |
+
contribute to the preparation of risk assessment reports. The Junior Risk Analyst
|
| 83 |
+
must possess strong analytical skills and the ability to communicate findings
|
| 84 |
+
effectively, while also being adaptable to changing regulations and market conditions.
|
| 85 |
+
This position requires a detail-oriented individual who can work collaboratively
|
| 86 |
+
in a fast-paced environment.
|
| 87 |
+
- source_sentence: The Exhibition Producer/Conference Producer/Meeting Planner is
|
| 88 |
+
responsible for the development of concepts and content curation for meetings,
|
| 89 |
+
conferences and exhibitions. He/She works closely with internal and external stakeholders
|
| 90 |
+
to develop incentive programmes, conferences and exhibitions. He utilises findings
|
| 91 |
+
from market research to develop new meetings and enhance the user experience.
|
| 92 |
+
Innovative and insightful, he is able to rationalise plethora of ideas into marketable
|
| 93 |
+
products that meets customer requirements. He stays abreast of industry and market
|
| 94 |
+
trends to discover current, new, and alternative growth areas and subjects for
|
| 95 |
+
meetings, conferences and exhibitions. He travels frequently to attend industry
|
| 96 |
+
events and networks extensively outside of the office to have a deeper understanding
|
| 97 |
+
on the emerging trends in the industry.
|
| 98 |
+
sentences:
|
| 99 |
+
- The Conference Assistant is responsible for supporting the planning and execution
|
| 100 |
+
of meetings and conferences. This role focuses on administrative tasks such as
|
| 101 |
+
scheduling, coordinating logistics, and assisting with on-site operations. The
|
| 102 |
+
Conference Assistant works under the guidance of senior staff to ensure all event
|
| 103 |
+
details are managed effectively. While they may assist in gathering information
|
| 104 |
+
from market research, their primary function is to handle logistical aspects rather
|
| 105 |
+
than develop event concepts. The position requires strong organizational skills
|
| 106 |
+
and the ability to follow instructions, but it does not involve the responsibility
|
| 107 |
+
of creating new programs or traveling for industry insights. Instead, the Conference
|
| 108 |
+
Assistant's role is primarily office-based, with limited external engagement.
|
| 109 |
+
- The Event Coordinator is tasked with the creation and organization of engaging
|
| 110 |
+
concepts and content for various events, including meetings, conferences, and
|
| 111 |
+
exhibitions. This role involves close collaboration with both internal teams and
|
| 112 |
+
external partners to design incentive programs and curate event experiences. Utilizing
|
| 113 |
+
insights gained from market research, the Event Coordinator is responsible for
|
| 114 |
+
innovating new events and enhancing attendee engagement. With a creative mindset,
|
| 115 |
+
they can distill a wide range of ideas into appealing offerings that align with
|
| 116 |
+
client needs. Staying informed about industry trends and market developments,
|
| 117 |
+
the Event Coordinator identifies new opportunities for growth and relevant topics
|
| 118 |
+
for events. Frequent travel to industry gatherings is essential, as it allows
|
| 119 |
+
for extensive networking and a deeper understanding of emerging trends.
|
| 120 |
+
- The Talent Development Manager is responsible for creating and executing comprehensive
|
| 121 |
+
talent development initiatives that align with the organization's current and
|
| 122 |
+
future business needs. This role involves designing clear career pathways to enhance
|
| 123 |
+
employee awareness of advancement opportunities and advising managers on effective
|
| 124 |
+
career development strategies. The Talent Development Manager oversees high-potential
|
| 125 |
+
talent programs and succession planning efforts, ensuring a robust pipeline for
|
| 126 |
+
key positions within the organization to support ongoing success and stability.
|
| 127 |
+
Additionally, this manager implements policies related to retirement and employee
|
| 128 |
+
exits, providing guidance on managing transitions effectively. The role also includes
|
| 129 |
+
managing team performance and operations while integrating Skill Frameworks into
|
| 130 |
+
talent development initiatives. The ideal candidate for this position is highly
|
| 131 |
+
people-oriented, possesses exceptional communication skills, and engages diplomatically
|
| 132 |
+
with various stakeholders. They are adept at analyzing complex challenges and
|
| 133 |
+
making informed decisions to drive organizational success.
|
| 134 |
+
- source_sentence: The Program Manager plans and oversees multiple inter-dependent
|
| 135 |
+
programs spanning multiple years that impact one or more business units or one
|
| 136 |
+
larger project. He/She oversees all aspects of assigned programs throughout program
|
| 137 |
+
lifecycles to ensure completion within the defined scope, quality, time and cost
|
| 138 |
+
constraints. He ensures accurate allocations of resources throughout the program.
|
| 139 |
+
He leads multi-disciplinary teams, composed of various levels of personnel, vendors,
|
| 140 |
+
and clients to create and deploy successful programs. He coaches team members
|
| 141 |
+
on Agile practices and values, and Scrum process framework. He is proficient in
|
| 142 |
+
Agile practices and methodology, project management methodologies and tools, as
|
| 143 |
+
well as Scrum process framework. The Program Manager is confident and decisive
|
| 144 |
+
in leading projects, overseeing the completion and integration of inter-dependent
|
| 145 |
+
programs and parts. He has excellent communication skills, capable of effectively
|
| 146 |
+
influencing various internal and external stakeholders.
|
| 147 |
+
sentences:
|
| 148 |
+
- The Senior Workplace Safety and Health (WSH) Auditor plays a pivotal role in guiding
|
| 149 |
+
the audit team through comprehensive WSH audits for various client organizations.
|
| 150 |
+
This individual is tasked with providing expert advice to stakeholders on matters
|
| 151 |
+
related to WSH audits. The Senior WSH Auditor excels in collaboration, possesses
|
| 152 |
+
strong analytical skills, is resourceful, and effectively fosters teamwork while
|
| 153 |
+
facilitating productive discussions.
|
| 154 |
+
- The Operations Manager is tasked with overseeing daily operational activities
|
| 155 |
+
within a specific department, ensuring that all processes run smoothly and efficiently.
|
| 156 |
+
This role involves managing the workflow and performance of the team to meet departmental
|
| 157 |
+
objectives and targets. The Operations Manager is responsible for resource allocation
|
| 158 |
+
and optimizing operational procedures to enhance productivity. They lead a team
|
| 159 |
+
of operational staff, providing support and training as necessary. Knowledge of
|
| 160 |
+
operational best practices and methodologies is crucial for success in this role.
|
| 161 |
+
The Operations Manager must be able to communicate effectively with team members
|
| 162 |
+
and stakeholders to facilitate collaboration and ensure alignment with organizational
|
| 163 |
+
goals.
|
| 164 |
+
- The Project Coordinator is responsible for planning and managing various interconnected
|
| 165 |
+
projects that span several years and affect multiple business units or a significant
|
| 166 |
+
project. This role involves overseeing all phases of assigned projects throughout
|
| 167 |
+
their lifecycles to ensure they are completed within the specified scope, quality,
|
| 168 |
+
timeline, and budget constraints. The Project Coordinator ensures that resources
|
| 169 |
+
are accurately allocated throughout the project duration. Additionally, they lead
|
| 170 |
+
cross-functional teams comprising different levels of personnel, vendors, and
|
| 171 |
+
clients to successfully implement and deliver projects. They provide guidance
|
| 172 |
+
to team members on Agile methodologies and practices, as well as the Scrum process
|
| 173 |
+
framework. Proficiency in Agile principles, project management techniques, and
|
| 174 |
+
Scrum methodologies is essential for this role. The Project Coordinator must be
|
| 175 |
+
assertive and decisive in managing projects, ensuring the successful completion
|
| 176 |
+
and integration of interrelated projects. Strong communication skills are crucial,
|
| 177 |
+
as they will need to effectively influence various stakeholders both internally
|
| 178 |
+
and externally.
|
| 179 |
+
- source_sentence: The Senior Anchor/Senior Presenter/Anchor/ Presenter - News delivers
|
| 180 |
+
news stories to the broadcasting station's audience and is the public face or
|
| 181 |
+
voice of the programmes broadcasted on various platforms. He/She is involved in
|
| 182 |
+
the collection of news materials and is required to conduct research on stories
|
| 183 |
+
and interview people who have accurate information on news events. He is also
|
| 184 |
+
involved in the development and writing of content and is responsible for reviewing
|
| 185 |
+
and editing materials written by other news reporters to ensure that the content
|
| 186 |
+
is tailored to the target audience. He is required to host or co-host programmes
|
| 187 |
+
by providing live commentaries and doing live interviews to create content that
|
| 188 |
+
links closely to the stories. He often works from a studio and may be expected
|
| 189 |
+
to travel in order to present news from remote locations in the field related
|
| 190 |
+
to a particular major news event. He follows a fixed working schedule, but may
|
| 191 |
+
be required to work at odd hours, including weekends, to cover important events.
|
| 192 |
+
He should be an effective communicator with an understanding of news editorial
|
| 193 |
+
process. He should ideally have a background in journalism or mass communications
|
| 194 |
+
and possesses an understanding of daily newscast content and media ethics. He
|
| 195 |
+
ought to be able to improvise and ad-lib in a live on-camera setting and be able
|
| 196 |
+
to work well with others across a variety of situations.
|
| 197 |
+
sentences:
|
| 198 |
+
- 'The Sustainability Advisor offers expert guidance to organizations seeking to
|
| 199 |
+
implement effective waste management practices that comply with environmental
|
| 200 |
+
standards and align with their corporate social responsibility objectives. This
|
| 201 |
+
role involves performing comprehensive waste audits and risk evaluations, crafting
|
| 202 |
+
and proposing strategies for waste reduction, and spearheading initiatives focused
|
| 203 |
+
on the circular economy and sustainability. The Sustainability Advisor is expected
|
| 204 |
+
to utilize advanced waste management technologies, ensure adherence to regulations,
|
| 205 |
+
and conduct life cycle assessments to deliver practical recommendations that facilitate
|
| 206 |
+
organizational transformation. Additionally, this position is crucial in maintaining
|
| 207 |
+
regulatory compliance and permits, providing training to stakeholders on best
|
| 208 |
+
waste management practices, and promoting a culture of engagement and change among
|
| 209 |
+
all parties involved.
|
| 210 |
+
|
| 211 |
+
|
| 212 |
+
The Sustainability Advisor must possess strong problem-solving abilities, effective
|
| 213 |
+
communication skills, and the capacity to influence others, enabling them to engage
|
| 214 |
+
with cross-functional teams and a variety of stakeholders successfully. They will
|
| 215 |
+
manage waste management projects efficiently and cultivate a sustainable culture
|
| 216 |
+
within organizations to help achieve long-term waste management objectives.'
|
| 217 |
+
- The Junior News Reporter is tasked with gathering and compiling news stories for
|
| 218 |
+
the broadcasting station's audience, acting as a supportive figure in various
|
| 219 |
+
programs aired on different platforms. This position includes the collection of
|
| 220 |
+
news materials and conducting basic research on stories, as well as interviewing
|
| 221 |
+
individuals who may provide information on news events. The Junior News Reporter
|
| 222 |
+
assists in the writing and editing of content produced by senior reporters, ensuring
|
| 223 |
+
it meets the audience's needs. This role may also involve supporting hosts during
|
| 224 |
+
live programs by providing background information and conducting interviews to
|
| 225 |
+
aid in storytelling. While primarily working from the studio, there may be occasional
|
| 226 |
+
travel to gather information for specific news events. The Junior News Reporter
|
| 227 |
+
typically follows a standard work schedule but might be called upon to work during
|
| 228 |
+
odd hours, including weekends, to assist in covering significant events. Effective
|
| 229 |
+
communication skills and a foundational understanding of media ethics are important,
|
| 230 |
+
along with a basic knowledge of the news editorial process. The ability to work
|
| 231 |
+
collaboratively with colleagues and adapt to various situations is also necessary.
|
| 232 |
+
- The Lead News Presenter is responsible for delivering engaging news stories to
|
| 233 |
+
the audience of the broadcasting station, serving as the public face or voice
|
| 234 |
+
of various programs aired across multiple platforms. This role involves gathering
|
| 235 |
+
news materials, conducting thorough research, and interviewing credible sources
|
| 236 |
+
to ensure accurate reporting on current events. The Lead News Presenter also plays
|
| 237 |
+
a critical role in content development and writing, as well as reviewing and editing
|
| 238 |
+
articles produced by other reporters to ensure alignment with the target audience's
|
| 239 |
+
interests. Additionally, this position requires hosting or co-hosting programs,
|
| 240 |
+
providing live commentary, and conducting live interviews to enhance storytelling.
|
| 241 |
+
While primarily based in a studio, the Lead News Presenter may travel to report
|
| 242 |
+
from remote locations for significant news events. The role follows a structured
|
| 243 |
+
work schedule but may necessitate working irregular hours, including weekends,
|
| 244 |
+
to cover key happenings. Strong communication skills and a solid understanding
|
| 245 |
+
of the news editorial process are essential, along with a background in journalism
|
| 246 |
+
or mass communications, familiarity with daily newscast content, and adherence
|
| 247 |
+
to media ethics. The ability to improvise and ad-lib during live broadcasts while
|
| 248 |
+
collaborating effectively with team members is also crucial.
|
| 249 |
+
datasets:
|
| 250 |
+
- Fatin757/ssf-train-valid_v3
|
| 251 |
+
pipeline_tag: sentence-similarity
|
| 252 |
+
library_name: sentence-transformers
|
| 253 |
+
---
|
| 254 |
+
|
| 255 |
+
# SentenceTransformer based on answerdotai/ModernBERT-base
|
| 256 |
+
|
| 257 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the [ssf-train-valid_v3](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3) dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 258 |
+
|
| 259 |
+
## Model Details
|
| 260 |
+
|
| 261 |
+
### Model Description
|
| 262 |
+
- **Model Type:** Sentence Transformer
|
| 263 |
+
- **Base model:** [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) <!-- at revision 8949b909ec900327062f0ebf497f51aef5e6f0c8 -->
|
| 264 |
+
- **Maximum Sequence Length:** 8192 tokens
|
| 265 |
+
- **Output Dimensionality:** 768 dimensions
|
| 266 |
+
- **Similarity Function:** Cosine Similarity
|
| 267 |
+
- **Training Dataset:**
|
| 268 |
+
- [ssf-train-valid_v3](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3)
|
| 269 |
+
<!-- - **Language:** Unknown -->
|
| 270 |
+
<!-- - **License:** Unknown -->
|
| 271 |
+
|
| 272 |
+
### Model Sources
|
| 273 |
+
|
| 274 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
| 275 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
| 276 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
| 277 |
+
|
| 278 |
+
### Full Model Architecture
|
| 279 |
+
|
| 280 |
+
```
|
| 281 |
+
SentenceTransformer(
|
| 282 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
|
| 283 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 284 |
+
)
|
| 285 |
+
```
|
| 286 |
+
|
| 287 |
+
## Usage
|
| 288 |
+
|
| 289 |
+
### Direct Usage (Sentence Transformers)
|
| 290 |
+
|
| 291 |
+
First install the Sentence Transformers library:
|
| 292 |
+
|
| 293 |
+
```bash
|
| 294 |
+
pip install -U sentence-transformers
|
| 295 |
+
```
|
| 296 |
+
|
| 297 |
+
Then you can load this model and run inference.
|
| 298 |
+
```python
|
| 299 |
+
from sentence_transformers import SentenceTransformer
|
| 300 |
+
|
| 301 |
+
# Download from the 🤗 Hub
|
| 302 |
+
model = SentenceTransformer("Fatin757/ssf-retriever-modernbert-v2")
|
| 303 |
+
# Run inference
|
| 304 |
+
sentences = [
|
| 305 |
+
"The Senior Anchor/Senior Presenter/Anchor/ Presenter - News delivers news stories to the broadcasting station's audience and is the public face or voice of the programmes broadcasted on various platforms. He/She is involved in the collection of news materials and is required to conduct research on stories and interview people who have accurate information on news events. He is also involved in the development and writing of content and is responsible for reviewing and editing materials written by other news reporters to ensure that the content is tailored to the target audience. He is required to host or co-host programmes by providing live commentaries and doing live interviews to create content that links closely to the stories. He often works from a studio and may be expected to travel in order to present news from remote locations in the field related to a particular major news event. He follows a fixed working schedule, but may be required to work at odd hours, including weekends, to cover important events. He should be an effective communicator with an understanding of news editorial process. He should ideally have a background in journalism or mass communications and possesses an understanding of daily newscast content and media ethics. He ought to be able to improvise and ad-lib in a live on-camera setting and be able to work well with others across a variety of situations.",
|
| 306 |
+
"The Lead News Presenter is responsible for delivering engaging news stories to the audience of the broadcasting station, serving as the public face or voice of various programs aired across multiple platforms. This role involves gathering news materials, conducting thorough research, and interviewing credible sources to ensure accurate reporting on current events. The Lead News Presenter also plays a critical role in content development and writing, as well as reviewing and editing articles produced by other reporters to ensure alignment with the target audience's interests. Additionally, this position requires hosting or co-hosting programs, providing live commentary, and conducting live interviews to enhance storytelling. While primarily based in a studio, the Lead News Presenter may travel to report from remote locations for significant news events. The role follows a structured work schedule but may necessitate working irregular hours, including weekends, to cover key happenings. Strong communication skills and a solid understanding of the news editorial process are essential, along with a background in journalism or mass communications, familiarity with daily newscast content, and adherence to media ethics. The ability to improvise and ad-lib during live broadcasts while collaborating effectively with team members is also crucial.",
|
| 307 |
+
"The Junior News Reporter is tasked with gathering and compiling news stories for the broadcasting station's audience, acting as a supportive figure in various programs aired on different platforms. This position includes the collection of news materials and conducting basic research on stories, as well as interviewing individuals who may provide information on news events. The Junior News Reporter assists in the writing and editing of content produced by senior reporters, ensuring it meets the audience's needs. This role may also involve supporting hosts during live programs by providing background information and conducting interviews to aid in storytelling. While primarily working from the studio, there may be occasional travel to gather information for specific news events. The Junior News Reporter typically follows a standard work schedule but might be called upon to work during odd hours, including weekends, to assist in covering significant events. Effective communication skills and a foundational understanding of media ethics are important, along with a basic knowledge of the news editorial process. The ability to work collaboratively with colleagues and adapt to various situations is also necessary.",
|
| 308 |
+
]
|
| 309 |
+
embeddings = model.encode(sentences)
|
| 310 |
+
print(embeddings.shape)
|
| 311 |
+
# [3, 768]
|
| 312 |
+
|
| 313 |
+
# Get the similarity scores for the embeddings
|
| 314 |
+
similarities = model.similarity(embeddings, embeddings)
|
| 315 |
+
print(similarities)
|
| 316 |
+
# tensor([[1.0000, 0.9225, 0.4020],
|
| 317 |
+
# [0.9225, 1.0000, 0.4397],
|
| 318 |
+
# [0.4020, 0.4397, 1.0000]])
|
| 319 |
+
```
|
| 320 |
+
|
| 321 |
+
<!--
|
| 322 |
+
### Direct Usage (Transformers)
|
| 323 |
+
|
| 324 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
| 325 |
+
|
| 326 |
+
</details>
|
| 327 |
+
-->
|
| 328 |
+
|
| 329 |
+
<!--
|
| 330 |
+
### Downstream Usage (Sentence Transformers)
|
| 331 |
+
|
| 332 |
+
You can finetune this model on your own dataset.
|
| 333 |
+
|
| 334 |
+
<details><summary>Click to expand</summary>
|
| 335 |
+
|
| 336 |
+
</details>
|
| 337 |
+
-->
|
| 338 |
+
|
| 339 |
+
<!--
|
| 340 |
+
### Out-of-Scope Use
|
| 341 |
+
|
| 342 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
| 343 |
+
-->
|
| 344 |
+
|
| 345 |
+
<!--
|
| 346 |
+
## Bias, Risks and Limitations
|
| 347 |
+
|
| 348 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
| 349 |
+
-->
|
| 350 |
+
|
| 351 |
+
<!--
|
| 352 |
+
### Recommendations
|
| 353 |
+
|
| 354 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
| 355 |
+
-->
|
| 356 |
+
|
| 357 |
+
## Training Details
|
| 358 |
+
|
| 359 |
+
### Training Dataset
|
| 360 |
+
|
| 361 |
+
#### ssf-train-valid_v3
|
| 362 |
+
|
| 363 |
+
* Dataset: [ssf-train-valid_v3](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3) at [5d23d8e](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3/tree/5d23d8eeff6292b6f2fb97cae6fd2f287dca7758)
|
| 364 |
+
* Size: 6,032 training samples
|
| 365 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 366 |
+
* Approximate statistics based on the first 1000 samples:
|
| 367 |
+
| | anchor | positive | negative |
|
| 368 |
+
|:--------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 369 |
+
| type | string | string | string |
|
| 370 |
+
| details | <ul><li>min: 60 tokens</li><li>mean: 168.86 tokens</li><li>max: 403 tokens</li></ul> | <ul><li>min: 74 tokens</li><li>mean: 165.4 tokens</li><li>max: 318 tokens</li></ul> | <ul><li>min: 63 tokens</li><li>mean: 139.63 tokens</li><li>max: 253 tokens</li></ul> |
|
| 371 |
+
* Samples:
|
| 372 |
+
| anchor | positive | negative |
|
| 373 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 374 |
+
| <code>The Brokerage Supervisor/ Freight Supervisor is responsible for liaising with customers, logistics operators and customs officials and supervising the custom clearance/freight forwarding operations to ensure goods are cleared through customs or quarantine in accordance with import and export laws and regulations. Analytical and systematic, he/she is required to supervise a freight operations team to execute operations in a timely manner to meet business and customers' requirements. He/She is also expected to work with internal and external stakeholders to accomplish his work.</code> | <code>The Logistics Operations Supervisor is tasked with coordinating communication between clients, transportation providers, and regulatory agencies while overseeing the customs clearance and freight forwarding processes. This role ensures compliance with all import and export regulations, facilitating the smooth transit of goods through customs and quarantine. The ideal candidate will possess strong analytical and organizational skills, leading a team of logistics professionals to execute operations efficiently and meet both business objectives and customer expectations. Collaboration with various internal and external partners is essential to successfully fulfill the responsibilities of this position.</code> | <code>The Freight Operations Coordinator is responsible for managing interactions with suppliers, transport companies, and regulatory bodies while overseeing the delivery processes to ensure shipments are dispatched in alignment with logistics standards. This role focuses on adherence to transportation guidelines and the timely execution of delivery schedules. The successful candidate should demonstrate excellent problem-solving abilities and be capable of directing a team of logistics personnel to achieve operational targets. Coordination with different departments and external partners is crucial for the effective management of this role.</code> |
|
| 375 |
+
| <code>The Senior Quality Engineer evaluates and manages quality systems, tools and standards to meet business needs. He/She, as the subject matter expert, is required to identify risk areas, ensure the robustness of the risk control plans deployed for excursion free launch and conduct qualification and/or validation for new materials. He is responsible for developing a management system to ensure that operations meet both internal and external parties quality requirements. He has to take the lead in managing cross-functional teams in continuous improvement projects and assist in implementing process improvement projects. He plays an important role in organisational development through development of on-the-job training and mentoring of team leaders. The Senior Quality Engineer possesses an analytical mind and leadership skills to steer the team to perform their best and achieve the desired organisational outcomes.</code> | <code>The Quality Assurance Manager is responsible for assessing and overseeing quality systems, methodologies, and standards to align with business objectives. As an expert in the field, this individual will pinpoint areas of risk, ensure that effective risk management strategies are in place for successful product launches, and conduct thorough qualification and validation of new materials. The role involves creating a comprehensive quality management system to guarantee compliance with both internal standards and external regulations. Additionally, the Quality Assurance Manager will lead cross-functional teams in ongoing improvement initiatives and support the execution of process enhancement projects. This position is pivotal in fostering organizational growth through the development of on-the-job training programs and mentoring for team leaders. The ideal candidate will have strong analytical capabilities and leadership qualities to drive team performance and achieve key organizational ...</code> | <code>The Junior Quality Control Technician is tasked with performing routine inspections and testing of products to ensure compliance with quality standards. This role requires the technician to document findings and report any discrepancies to the senior staff. While they contribute to maintaining quality assurance processes, they are not responsible for developing management systems or leading cross-functional teams. Instead, their focus will be on executing established procedures and assisting in minor quality improvement tasks. The Junior Quality Control Technician will work under close supervision and will not engage in risk management or validation processes, limiting their involvement to basic quality checks and reporting.</code> |
|
| 376 |
+
| <code>The Waste Process Engineer is responsible for designing, optimising and managing processes and systems for the efficient handling, treatment, transformation and disposal of waste, including electronic waste (e-waste) and plastic waste, for an organisation. He/She is focused on minimising waste generation, exploring new technologies for enhancing waste management efficiency and material recovery, and recommending improved waste management systems and processes within an organisation. Based on life-cycle analyses and evaluation of current waste streams, systems and waste-to-resource initiatives, he will develop and implement new processes, ensuring compliance with environmental regulations. <br><br>The Waste Process Engineer must be meticulous, with an eye for detail and have strong analytical and research skills to stay up to date on best practices and circular economy strategies related to waste management. He is also expected to work collaboratively with cross-functional teams to promote su...</code> | <code>The Waste Management Engineer is tasked with the design, optimization, and oversight of processes and systems aimed at the effective handling, treatment, transformation, and disposal of various waste types, including electronic waste (e-waste) and plastics. This role emphasizes the reduction of waste generation and the exploration of innovative technologies to enhance waste management efficiency and material recovery. Additionally, the engineer will assess current waste streams and implement waste-to-resource initiatives based on thorough life-cycle analyses. Ensuring compliance with environmental regulations, the Waste Management Engineer will develop and execute new processes while collaborating with cross-functional teams to advocate for sustainable practices and advance the organization’s objectives within the Circular Economy.</code> | <code>The Environmental Compliance Officer is responsible for monitoring and enforcing adherence to environmental laws and regulations within an organization. This role focuses on assessing the impact of various operations on the environment and ensuring that all practices comply with legal standards. The officer will conduct regular audits, prepare reports, and provide training to staff about environmental policies. They will also work closely with regulatory agencies to maintain compliance and address any environmental concerns that may arise. Strong attention to detail and analytical skills are essential for this position, as is the ability to collaborate with various departments to ensure that the organization meets its sustainability goals.</code> |
|
| 377 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 378 |
+
```json
|
| 379 |
+
{
|
| 380 |
+
"scale": 20.0,
|
| 381 |
+
"similarity_fct": "cos_sim",
|
| 382 |
+
"gather_across_devices": false
|
| 383 |
+
}
|
| 384 |
+
```
|
| 385 |
+
|
| 386 |
+
### Evaluation Dataset
|
| 387 |
+
|
| 388 |
+
#### ssf-train-valid_v3
|
| 389 |
+
|
| 390 |
+
* Dataset: [ssf-train-valid_v3](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3) at [5d23d8e](https://huggingface.co/datasets/Fatin757/ssf-train-valid_v3/tree/5d23d8eeff6292b6f2fb97cae6fd2f287dca7758)
|
| 391 |
+
* Size: 1,508 evaluation samples
|
| 392 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 393 |
+
* Approximate statistics based on the first 1000 samples:
|
| 394 |
+
| | anchor | positive | negative |
|
| 395 |
+
|:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
| 396 |
+
| type | string | string | string |
|
| 397 |
+
| details | <ul><li>min: 57 tokens</li><li>mean: 166.64 tokens</li><li>max: 349 tokens</li></ul> | <ul><li>min: 67 tokens</li><li>mean: 162.89 tokens</li><li>max: 301 tokens</li></ul> | <ul><li>min: 65 tokens</li><li>mean: 139.7 tokens</li><li>max: 263 tokens</li></ul> |
|
| 398 |
+
* Samples:
|
| 399 |
+
| anchor | positive | negative |
|
| 400 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 401 |
+
| <code>The Manager - Standards and Practices (S&P) ensures that content delivered by the organisation complies with the regulatory requirements and censorship norms of the local territories where the content may be available. He/She also provides advisory ratings for the content based on the regulatory guidelines. He keeps abreast of the local, cultural and political norms and sensitivities to support the creation of content classification guidelines. The work involves coordinating internal and external processes for delivery within tight timelines. He is highly accountable for the organisation's brand and reputation given the sensitivities of content classification. He should be comfortable coordinating with internal and external stakeholders in order to balance the organisation's priorities with compliance to guidelines and norms. He should be effective at planning and organising. He should also be aware of the regulatory, political and cultural landscape and possess a keen eye for detail t...</code> | <code>The Content Compliance Manager plays a crucial role in ensuring that all materials produced by the organization meet the necessary regulatory standards and censorship requirements of the respective local markets. This individual will provide expert advisory ratings for content in alignment with established regulatory frameworks. Staying informed about local cultural and political dynamics is essential to aid in the development of content classification guidelines. The role requires effective coordination of both internal and external processes to ensure timely delivery while maintaining the integrity of the organization's brand and reputation. The ideal candidate will excel in stakeholder engagement, balancing organizational objectives with compliance mandates. Strong planning, organizational skills, and a meticulous attention to detail are vital for identifying compliance issues during content reviews. Proficiency in communication and stakeholder management is necessary for successful...</code> | <code>The Junior Risk Analyst is responsible for evaluating potential risks that may impact the organization’s operations and financial performance. This role involves conducting thorough assessments of various risk factors and preparing detailed reports on findings. The Junior Risk Analyst will collaborate with different departments to identify risk mitigation strategies and ensure that appropriate measures are in place. Strong analytical skills and attention to detail are essential for identifying potential vulnerabilities within the organization. The position requires effective communication with team members and management to discuss risk assessments and recommendations. The ideal candidate should also be comfortable working under tight deadlines while maintaining a high level of accuracy in their analyses.</code> |
|
| 402 |
+
| <code>The Psychologist provides psychological services within multi-disciplinary settings, applying a wide array of psychometric assessments and treatments for clients with routine psychological conditions. He/She conducts psychoeducational programmes and training for other professionals. He is a resourceful, proactive and collaborative professional, and works in varied settings such as public and private institutions, hospitals, healthcare and voluntary welfare organisations. He works under supervision and in collaboration with other professionals in the course of his work.</code> | <code>The Clinical Psychologist delivers comprehensive psychological services in diverse multi-disciplinary environments, utilizing a broad spectrum of psychometric evaluations and therapeutic interventions for clients experiencing common psychological issues. This role involves designing and implementing psychoeducational programs and training sessions aimed at enhancing the skills of fellow professionals. The ideal candidate is resourceful, proactive, and excels in collaboration, working effectively across various settings including public and private institutions, hospitals, healthcare facilities, and voluntary welfare organizations. The Clinical Psychologist operates under supervision while engaging collaboratively with other professionals to ensure the best outcomes for clients.</code> | <code>The Clinical Psychologist conducts psychological assessments within corporate environments, focusing primarily on employee mental health and organizational behavior. This position emphasizes the development and implementation of workplace wellness programs and training for management teams. The successful candidate will be an innovative, self-motivated individual who thrives in collaborative settings, working closely with human resources and management to address workplace-related psychological issues. The Clinical Psychologist operates independently while coordinating with various departments to enhance employee well-being and productivity in a corporate context.</code> |
|
| 403 |
+
| <code>The Planning Manager (Aircraft Engine / Component Maintenance) is responsible for strategising long-term supply chain management plans, optimising aircraft engine and component maintenance planning and resource management, and driving supplier capability and performance enhancement programmes. He/She develops vendor management strategies and leads technology application to strengthen sourcing, inventory and warehousing operations. He provides project management oversight for engine servicing operations and manages technical and programme reviews with customers and suppliers. He manages compliance with airworthiness and legislative requirements, and contributes to development of the organisation's standard operating procedures (SOPs), management systems, lean and sustainability practices, and data analytics plans for strategic decision-making. He drives team performance to achieve business key performance indicators (KPIs) and leads talent recruitment and development plans. He should de...</code> | <code>The Aircraft Maintenance Planning Specialist is tasked with formulating comprehensive long-term strategies for supply chain management, focusing on the optimization of maintenance planning for aircraft engines and components. This role involves enhancing supplier capabilities and performance through targeted programs. The specialist will develop effective vendor management strategies and leverage technology to improve sourcing, inventory control, and warehousing operations. Additionally, they will oversee project management for engine servicing activities and facilitate technical and program reviews with both customers and suppliers. Compliance with airworthiness regulations and legislative requirements is critical, as is contributing to the organization's standard operating procedures (SOPs), management systems, and data analytics initiatives for informed decision-making. The specialist will also drive team performance to meet key performance indicators (KPIs) and lead initiatives for...</code> | <code>The Aircraft Component Quality Assurance Coordinator is responsible for implementing quality control measures and ensuring compliance with industry standards in the maintenance of aircraft components. This role focuses on conducting inspections and audits to assess the performance of suppliers and service providers. The coordinator develops quality assurance strategies and applies technology to enhance inspection processes and documentation practices. They will manage quality-related projects and collaborate with engineering teams to address any compliance issues with regulatory requirements. Additionally, the coordinator contributes to the development of quality management systems and participates in data analysis for quality improvement initiatives. They are expected to drive team engagement to achieve quality performance metrics and support training and development programs for staff. Strong attention to detail, analytical skills, and the ability to work collaboratively across depar...</code> |
|
| 404 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
| 405 |
+
```json
|
| 406 |
+
{
|
| 407 |
+
"scale": 20.0,
|
| 408 |
+
"similarity_fct": "cos_sim",
|
| 409 |
+
"gather_across_devices": false
|
| 410 |
+
}
|
| 411 |
+
```
|
| 412 |
+
|
| 413 |
+
### Training Hyperparameters
|
| 414 |
+
#### Non-Default Hyperparameters
|
| 415 |
+
|
| 416 |
+
- `eval_strategy`: epoch
|
| 417 |
+
- `per_device_train_batch_size`: 32
|
| 418 |
+
- `per_device_eval_batch_size`: 16
|
| 419 |
+
- `num_train_epochs`: 5
|
| 420 |
+
- `lr_scheduler_type`: cosine
|
| 421 |
+
- `warmup_ratio`: 0.1
|
| 422 |
+
- `bf16`: True
|
| 423 |
+
- `load_best_model_at_end`: True
|
| 424 |
+
- `batch_sampler`: no_duplicates
|
| 425 |
+
|
| 426 |
+
#### All Hyperparameters
|
| 427 |
+
<details><summary>Click to expand</summary>
|
| 428 |
+
|
| 429 |
+
- `overwrite_output_dir`: False
|
| 430 |
+
- `do_predict`: False
|
| 431 |
+
- `eval_strategy`: epoch
|
| 432 |
+
- `prediction_loss_only`: True
|
| 433 |
+
- `per_device_train_batch_size`: 32
|
| 434 |
+
- `per_device_eval_batch_size`: 16
|
| 435 |
+
- `per_gpu_train_batch_size`: None
|
| 436 |
+
- `per_gpu_eval_batch_size`: None
|
| 437 |
+
- `gradient_accumulation_steps`: 1
|
| 438 |
+
- `eval_accumulation_steps`: None
|
| 439 |
+
- `torch_empty_cache_steps`: None
|
| 440 |
+
- `learning_rate`: 5e-05
|
| 441 |
+
- `weight_decay`: 0.0
|
| 442 |
+
- `adam_beta1`: 0.9
|
| 443 |
+
- `adam_beta2`: 0.999
|
| 444 |
+
- `adam_epsilon`: 1e-08
|
| 445 |
+
- `max_grad_norm`: 1.0
|
| 446 |
+
- `num_train_epochs`: 5
|
| 447 |
+
- `max_steps`: -1
|
| 448 |
+
- `lr_scheduler_type`: cosine
|
| 449 |
+
- `lr_scheduler_kwargs`: {}
|
| 450 |
+
- `warmup_ratio`: 0.1
|
| 451 |
+
- `warmup_steps`: 0
|
| 452 |
+
- `log_level`: passive
|
| 453 |
+
- `log_level_replica`: warning
|
| 454 |
+
- `log_on_each_node`: True
|
| 455 |
+
- `logging_nan_inf_filter`: True
|
| 456 |
+
- `save_safetensors`: True
|
| 457 |
+
- `save_on_each_node`: False
|
| 458 |
+
- `save_only_model`: False
|
| 459 |
+
- `restore_callback_states_from_checkpoint`: False
|
| 460 |
+
- `no_cuda`: False
|
| 461 |
+
- `use_cpu`: False
|
| 462 |
+
- `use_mps_device`: False
|
| 463 |
+
- `seed`: 42
|
| 464 |
+
- `data_seed`: None
|
| 465 |
+
- `jit_mode_eval`: False
|
| 466 |
+
- `use_ipex`: False
|
| 467 |
+
- `bf16`: True
|
| 468 |
+
- `fp16`: False
|
| 469 |
+
- `fp16_opt_level`: O1
|
| 470 |
+
- `half_precision_backend`: auto
|
| 471 |
+
- `bf16_full_eval`: False
|
| 472 |
+
- `fp16_full_eval`: False
|
| 473 |
+
- `tf32`: None
|
| 474 |
+
- `local_rank`: 0
|
| 475 |
+
- `ddp_backend`: None
|
| 476 |
+
- `tpu_num_cores`: None
|
| 477 |
+
- `tpu_metrics_debug`: False
|
| 478 |
+
- `debug`: []
|
| 479 |
+
- `dataloader_drop_last`: False
|
| 480 |
+
- `dataloader_num_workers`: 0
|
| 481 |
+
- `dataloader_prefetch_factor`: None
|
| 482 |
+
- `past_index`: -1
|
| 483 |
+
- `disable_tqdm`: False
|
| 484 |
+
- `remove_unused_columns`: True
|
| 485 |
+
- `label_names`: None
|
| 486 |
+
- `load_best_model_at_end`: True
|
| 487 |
+
- `ignore_data_skip`: False
|
| 488 |
+
- `fsdp`: []
|
| 489 |
+
- `fsdp_min_num_params`: 0
|
| 490 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 491 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 492 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 493 |
+
- `parallelism_config`: None
|
| 494 |
+
- `deepspeed`: None
|
| 495 |
+
- `label_smoothing_factor`: 0.0
|
| 496 |
+
- `optim`: adamw_torch_fused
|
| 497 |
+
- `optim_args`: None
|
| 498 |
+
- `adafactor`: False
|
| 499 |
+
- `group_by_length`: False
|
| 500 |
+
- `length_column_name`: length
|
| 501 |
+
- `ddp_find_unused_parameters`: None
|
| 502 |
+
- `ddp_bucket_cap_mb`: None
|
| 503 |
+
- `ddp_broadcast_buffers`: False
|
| 504 |
+
- `dataloader_pin_memory`: True
|
| 505 |
+
- `dataloader_persistent_workers`: False
|
| 506 |
+
- `skip_memory_metrics`: True
|
| 507 |
+
- `use_legacy_prediction_loop`: False
|
| 508 |
+
- `push_to_hub`: False
|
| 509 |
+
- `resume_from_checkpoint`: None
|
| 510 |
+
- `hub_model_id`: None
|
| 511 |
+
- `hub_strategy`: every_save
|
| 512 |
+
- `hub_private_repo`: None
|
| 513 |
+
- `hub_always_push`: False
|
| 514 |
+
- `hub_revision`: None
|
| 515 |
+
- `gradient_checkpointing`: False
|
| 516 |
+
- `gradient_checkpointing_kwargs`: None
|
| 517 |
+
- `include_inputs_for_metrics`: False
|
| 518 |
+
- `include_for_metrics`: []
|
| 519 |
+
- `eval_do_concat_batches`: True
|
| 520 |
+
- `fp16_backend`: auto
|
| 521 |
+
- `push_to_hub_model_id`: None
|
| 522 |
+
- `push_to_hub_organization`: None
|
| 523 |
+
- `mp_parameters`:
|
| 524 |
+
- `auto_find_batch_size`: False
|
| 525 |
+
- `full_determinism`: False
|
| 526 |
+
- `torchdynamo`: None
|
| 527 |
+
- `ray_scope`: last
|
| 528 |
+
- `ddp_timeout`: 1800
|
| 529 |
+
- `torch_compile`: False
|
| 530 |
+
- `torch_compile_backend`: None
|
| 531 |
+
- `torch_compile_mode`: None
|
| 532 |
+
- `include_tokens_per_second`: False
|
| 533 |
+
- `include_num_input_tokens_seen`: False
|
| 534 |
+
- `neftune_noise_alpha`: None
|
| 535 |
+
- `optim_target_modules`: None
|
| 536 |
+
- `batch_eval_metrics`: False
|
| 537 |
+
- `eval_on_start`: False
|
| 538 |
+
- `use_liger_kernel`: False
|
| 539 |
+
- `liger_kernel_config`: None
|
| 540 |
+
- `eval_use_gather_object`: False
|
| 541 |
+
- `average_tokens_across_devices`: False
|
| 542 |
+
- `prompts`: None
|
| 543 |
+
- `batch_sampler`: no_duplicates
|
| 544 |
+
- `multi_dataset_batch_sampler`: proportional
|
| 545 |
+
- `router_mapping`: {}
|
| 546 |
+
- `learning_rate_mapping`: {}
|
| 547 |
+
|
| 548 |
+
</details>
|
| 549 |
+
|
| 550 |
+
### Training Logs
|
| 551 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
| 552 |
+
|:-------:|:-------:|:-------------:|:---------------:|
|
| 553 |
+
| 1.0 | 189 | 0.3362 | 0.0096 |
|
| 554 |
+
| 2.0 | 378 | 0.0079 | 0.0056 |
|
| 555 |
+
| 3.0 | 567 | 0.0029 | 0.0042 |
|
| 556 |
+
| 4.0 | 756 | 0.0026 | 0.0037 |
|
| 557 |
+
| **5.0** | **945** | **0.0023** | **0.0032** |
|
| 558 |
+
|
| 559 |
+
* The bold row denotes the saved checkpoint.
|
| 560 |
+
|
| 561 |
+
### Framework Versions
|
| 562 |
+
- Python: 3.12.11
|
| 563 |
+
- Sentence Transformers: 5.1.0
|
| 564 |
+
- Transformers: 4.56.1
|
| 565 |
+
- PyTorch: 2.8.0+cu128
|
| 566 |
+
- Accelerate: 1.10.0
|
| 567 |
+
- Datasets: 4.0.0
|
| 568 |
+
- Tokenizers: 0.22.0
|
| 569 |
+
|
| 570 |
+
## Citation
|
| 571 |
+
|
| 572 |
+
### BibTeX
|
| 573 |
+
|
| 574 |
+
#### Sentence Transformers
|
| 575 |
+
```bibtex
|
| 576 |
+
@inproceedings{reimers-2019-sentence-bert,
|
| 577 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
| 578 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
| 579 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
| 580 |
+
month = "11",
|
| 581 |
+
year = "2019",
|
| 582 |
+
publisher = "Association for Computational Linguistics",
|
| 583 |
+
url = "https://arxiv.org/abs/1908.10084",
|
| 584 |
+
}
|
| 585 |
+
```
|
| 586 |
+
|
| 587 |
+
#### MultipleNegativesRankingLoss
|
| 588 |
+
```bibtex
|
| 589 |
+
@misc{henderson2017efficient,
|
| 590 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
| 591 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
| 592 |
+
year={2017},
|
| 593 |
+
eprint={1705.00652},
|
| 594 |
+
archivePrefix={arXiv},
|
| 595 |
+
primaryClass={cs.CL}
|
| 596 |
+
}
|
| 597 |
+
```
|
| 598 |
+
|
| 599 |
+
<!--
|
| 600 |
+
## Glossary
|
| 601 |
+
|
| 602 |
+
*Clearly define terms in order to be accessible across audiences.*
|
| 603 |
+
-->
|
| 604 |
+
|
| 605 |
+
<!--
|
| 606 |
+
## Model Card Authors
|
| 607 |
+
|
| 608 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
| 609 |
+
-->
|
| 610 |
+
|
| 611 |
+
<!--
|
| 612 |
+
## Model Card Contact
|
| 613 |
+
|
| 614 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
| 615 |
+
-->
|
config.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"ModernBertModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 50281,
|
| 8 |
+
"classifier_activation": "gelu",
|
| 9 |
+
"classifier_bias": false,
|
| 10 |
+
"classifier_dropout": 0.0,
|
| 11 |
+
"classifier_pooling": "mean",
|
| 12 |
+
"cls_token_id": 50281,
|
| 13 |
+
"decoder_bias": true,
|
| 14 |
+
"deterministic_flash_attn": false,
|
| 15 |
+
"dtype": "float32",
|
| 16 |
+
"embedding_dropout": 0.0,
|
| 17 |
+
"eos_token_id": 50282,
|
| 18 |
+
"global_attn_every_n_layers": 3,
|
| 19 |
+
"global_rope_theta": 160000.0,
|
| 20 |
+
"gradient_checkpointing": false,
|
| 21 |
+
"hidden_activation": "gelu",
|
| 22 |
+
"hidden_size": 768,
|
| 23 |
+
"initializer_cutoff_factor": 2.0,
|
| 24 |
+
"initializer_range": 0.02,
|
| 25 |
+
"intermediate_size": 1152,
|
| 26 |
+
"layer_norm_eps": 1e-05,
|
| 27 |
+
"local_attention": 128,
|
| 28 |
+
"local_rope_theta": 10000.0,
|
| 29 |
+
"max_position_embeddings": 8192,
|
| 30 |
+
"mlp_bias": false,
|
| 31 |
+
"mlp_dropout": 0.0,
|
| 32 |
+
"model_type": "modernbert",
|
| 33 |
+
"norm_bias": false,
|
| 34 |
+
"norm_eps": 1e-05,
|
| 35 |
+
"num_attention_heads": 12,
|
| 36 |
+
"num_hidden_layers": 22,
|
| 37 |
+
"pad_token_id": 50283,
|
| 38 |
+
"position_embedding_type": "absolute",
|
| 39 |
+
"repad_logits_with_grad": false,
|
| 40 |
+
"sep_token_id": 50282,
|
| 41 |
+
"sparse_pred_ignore_index": -100,
|
| 42 |
+
"sparse_prediction": false,
|
| 43 |
+
"transformers_version": "4.56.1",
|
| 44 |
+
"vocab_size": 50368
|
| 45 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "SentenceTransformer",
|
| 3 |
+
"__version__": {
|
| 4 |
+
"sentence_transformers": "5.1.0",
|
| 5 |
+
"transformers": "4.56.1",
|
| 6 |
+
"pytorch": "2.8.0+cu128"
|
| 7 |
+
},
|
| 8 |
+
"prompts": {
|
| 9 |
+
"query": "",
|
| 10 |
+
"document": ""
|
| 11 |
+
},
|
| 12 |
+
"default_prompt_name": null,
|
| 13 |
+
"similarity_fn_name": "cosine"
|
| 14 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cd52551d2c00c1a833fc5a6a467ed75eee182b1c8744f20ca5181434d0a0a0f
|
| 3 |
+
size 596070136
|
modules.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.models.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.models.Pooling"
|
| 13 |
+
}
|
| 14 |
+
]
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"max_seq_length": 8192,
|
| 3 |
+
"do_lower_case": false
|
| 4 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cls_token": {
|
| 3 |
+
"content": "[CLS]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"mask_token": {
|
| 10 |
+
"content": "[MASK]",
|
| 11 |
+
"lstrip": true,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "[PAD]",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"sep_token": {
|
| 24 |
+
"content": "[SEP]",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": false,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
},
|
| 30 |
+
"unk_token": {
|
| 31 |
+
"content": "[UNK]",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false
|
| 36 |
+
}
|
| 37 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "|||IP_ADDRESS|||",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": true,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": false
|
| 10 |
+
},
|
| 11 |
+
"1": {
|
| 12 |
+
"content": "<|padding|>",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"50254": {
|
| 20 |
+
"content": " ",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": true,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": false
|
| 26 |
+
},
|
| 27 |
+
"50255": {
|
| 28 |
+
"content": " ",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": false
|
| 34 |
+
},
|
| 35 |
+
"50256": {
|
| 36 |
+
"content": " ",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": true,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": false
|
| 42 |
+
},
|
| 43 |
+
"50257": {
|
| 44 |
+
"content": " ",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": true,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": false
|
| 50 |
+
},
|
| 51 |
+
"50258": {
|
| 52 |
+
"content": " ",
|
| 53 |
+
"lstrip": false,
|
| 54 |
+
"normalized": true,
|
| 55 |
+
"rstrip": false,
|
| 56 |
+
"single_word": false,
|
| 57 |
+
"special": false
|
| 58 |
+
},
|
| 59 |
+
"50259": {
|
| 60 |
+
"content": " ",
|
| 61 |
+
"lstrip": false,
|
| 62 |
+
"normalized": true,
|
| 63 |
+
"rstrip": false,
|
| 64 |
+
"single_word": false,
|
| 65 |
+
"special": false
|
| 66 |
+
},
|
| 67 |
+
"50260": {
|
| 68 |
+
"content": " ",
|
| 69 |
+
"lstrip": false,
|
| 70 |
+
"normalized": true,
|
| 71 |
+
"rstrip": false,
|
| 72 |
+
"single_word": false,
|
| 73 |
+
"special": false
|
| 74 |
+
},
|
| 75 |
+
"50261": {
|
| 76 |
+
"content": " ",
|
| 77 |
+
"lstrip": false,
|
| 78 |
+
"normalized": true,
|
| 79 |
+
"rstrip": false,
|
| 80 |
+
"single_word": false,
|
| 81 |
+
"special": false
|
| 82 |
+
},
|
| 83 |
+
"50262": {
|
| 84 |
+
"content": " ",
|
| 85 |
+
"lstrip": false,
|
| 86 |
+
"normalized": true,
|
| 87 |
+
"rstrip": false,
|
| 88 |
+
"single_word": false,
|
| 89 |
+
"special": false
|
| 90 |
+
},
|
| 91 |
+
"50263": {
|
| 92 |
+
"content": " ",
|
| 93 |
+
"lstrip": false,
|
| 94 |
+
"normalized": true,
|
| 95 |
+
"rstrip": false,
|
| 96 |
+
"single_word": false,
|
| 97 |
+
"special": false
|
| 98 |
+
},
|
| 99 |
+
"50264": {
|
| 100 |
+
"content": " ",
|
| 101 |
+
"lstrip": false,
|
| 102 |
+
"normalized": true,
|
| 103 |
+
"rstrip": false,
|
| 104 |
+
"single_word": false,
|
| 105 |
+
"special": false
|
| 106 |
+
},
|
| 107 |
+
"50265": {
|
| 108 |
+
"content": " ",
|
| 109 |
+
"lstrip": false,
|
| 110 |
+
"normalized": true,
|
| 111 |
+
"rstrip": false,
|
| 112 |
+
"single_word": false,
|
| 113 |
+
"special": false
|
| 114 |
+
},
|
| 115 |
+
"50266": {
|
| 116 |
+
"content": " ",
|
| 117 |
+
"lstrip": false,
|
| 118 |
+
"normalized": true,
|
| 119 |
+
"rstrip": false,
|
| 120 |
+
"single_word": false,
|
| 121 |
+
"special": false
|
| 122 |
+
},
|
| 123 |
+
"50267": {
|
| 124 |
+
"content": " ",
|
| 125 |
+
"lstrip": false,
|
| 126 |
+
"normalized": true,
|
| 127 |
+
"rstrip": false,
|
| 128 |
+
"single_word": false,
|
| 129 |
+
"special": false
|
| 130 |
+
},
|
| 131 |
+
"50268": {
|
| 132 |
+
"content": " ",
|
| 133 |
+
"lstrip": false,
|
| 134 |
+
"normalized": true,
|
| 135 |
+
"rstrip": false,
|
| 136 |
+
"single_word": false,
|
| 137 |
+
"special": false
|
| 138 |
+
},
|
| 139 |
+
"50269": {
|
| 140 |
+
"content": " ",
|
| 141 |
+
"lstrip": false,
|
| 142 |
+
"normalized": true,
|
| 143 |
+
"rstrip": false,
|
| 144 |
+
"single_word": false,
|
| 145 |
+
"special": false
|
| 146 |
+
},
|
| 147 |
+
"50270": {
|
| 148 |
+
"content": " ",
|
| 149 |
+
"lstrip": false,
|
| 150 |
+
"normalized": true,
|
| 151 |
+
"rstrip": false,
|
| 152 |
+
"single_word": false,
|
| 153 |
+
"special": false
|
| 154 |
+
},
|
| 155 |
+
"50271": {
|
| 156 |
+
"content": " ",
|
| 157 |
+
"lstrip": false,
|
| 158 |
+
"normalized": true,
|
| 159 |
+
"rstrip": false,
|
| 160 |
+
"single_word": false,
|
| 161 |
+
"special": false
|
| 162 |
+
},
|
| 163 |
+
"50272": {
|
| 164 |
+
"content": " ",
|
| 165 |
+
"lstrip": false,
|
| 166 |
+
"normalized": true,
|
| 167 |
+
"rstrip": false,
|
| 168 |
+
"single_word": false,
|
| 169 |
+
"special": false
|
| 170 |
+
},
|
| 171 |
+
"50273": {
|
| 172 |
+
"content": " ",
|
| 173 |
+
"lstrip": false,
|
| 174 |
+
"normalized": true,
|
| 175 |
+
"rstrip": false,
|
| 176 |
+
"single_word": false,
|
| 177 |
+
"special": false
|
| 178 |
+
},
|
| 179 |
+
"50274": {
|
| 180 |
+
"content": " ",
|
| 181 |
+
"lstrip": false,
|
| 182 |
+
"normalized": true,
|
| 183 |
+
"rstrip": false,
|
| 184 |
+
"single_word": false,
|
| 185 |
+
"special": false
|
| 186 |
+
},
|
| 187 |
+
"50275": {
|
| 188 |
+
"content": " ",
|
| 189 |
+
"lstrip": false,
|
| 190 |
+
"normalized": true,
|
| 191 |
+
"rstrip": false,
|
| 192 |
+
"single_word": false,
|
| 193 |
+
"special": false
|
| 194 |
+
},
|
| 195 |
+
"50276": {
|
| 196 |
+
"content": " ",
|
| 197 |
+
"lstrip": false,
|
| 198 |
+
"normalized": true,
|
| 199 |
+
"rstrip": false,
|
| 200 |
+
"single_word": false,
|
| 201 |
+
"special": false
|
| 202 |
+
},
|
| 203 |
+
"50277": {
|
| 204 |
+
"content": "|||EMAIL_ADDRESS|||",
|
| 205 |
+
"lstrip": false,
|
| 206 |
+
"normalized": true,
|
| 207 |
+
"rstrip": false,
|
| 208 |
+
"single_word": false,
|
| 209 |
+
"special": false
|
| 210 |
+
},
|
| 211 |
+
"50278": {
|
| 212 |
+
"content": "|||PHONE_NUMBER|||",
|
| 213 |
+
"lstrip": false,
|
| 214 |
+
"normalized": true,
|
| 215 |
+
"rstrip": false,
|
| 216 |
+
"single_word": false,
|
| 217 |
+
"special": false
|
| 218 |
+
},
|
| 219 |
+
"50279": {
|
| 220 |
+
"content": "<|endoftext|>",
|
| 221 |
+
"lstrip": false,
|
| 222 |
+
"normalized": false,
|
| 223 |
+
"rstrip": false,
|
| 224 |
+
"single_word": false,
|
| 225 |
+
"special": true
|
| 226 |
+
},
|
| 227 |
+
"50280": {
|
| 228 |
+
"content": "[UNK]",
|
| 229 |
+
"lstrip": false,
|
| 230 |
+
"normalized": false,
|
| 231 |
+
"rstrip": false,
|
| 232 |
+
"single_word": false,
|
| 233 |
+
"special": true
|
| 234 |
+
},
|
| 235 |
+
"50281": {
|
| 236 |
+
"content": "[CLS]",
|
| 237 |
+
"lstrip": false,
|
| 238 |
+
"normalized": false,
|
| 239 |
+
"rstrip": false,
|
| 240 |
+
"single_word": false,
|
| 241 |
+
"special": true
|
| 242 |
+
},
|
| 243 |
+
"50282": {
|
| 244 |
+
"content": "[SEP]",
|
| 245 |
+
"lstrip": false,
|
| 246 |
+
"normalized": false,
|
| 247 |
+
"rstrip": false,
|
| 248 |
+
"single_word": false,
|
| 249 |
+
"special": true
|
| 250 |
+
},
|
| 251 |
+
"50283": {
|
| 252 |
+
"content": "[PAD]",
|
| 253 |
+
"lstrip": false,
|
| 254 |
+
"normalized": false,
|
| 255 |
+
"rstrip": false,
|
| 256 |
+
"single_word": false,
|
| 257 |
+
"special": true
|
| 258 |
+
},
|
| 259 |
+
"50284": {
|
| 260 |
+
"content": "[MASK]",
|
| 261 |
+
"lstrip": true,
|
| 262 |
+
"normalized": false,
|
| 263 |
+
"rstrip": false,
|
| 264 |
+
"single_word": false,
|
| 265 |
+
"special": true
|
| 266 |
+
},
|
| 267 |
+
"50285": {
|
| 268 |
+
"content": "[unused0]",
|
| 269 |
+
"lstrip": false,
|
| 270 |
+
"normalized": true,
|
| 271 |
+
"rstrip": false,
|
| 272 |
+
"single_word": false,
|
| 273 |
+
"special": false
|
| 274 |
+
},
|
| 275 |
+
"50286": {
|
| 276 |
+
"content": "[unused1]",
|
| 277 |
+
"lstrip": false,
|
| 278 |
+
"normalized": true,
|
| 279 |
+
"rstrip": false,
|
| 280 |
+
"single_word": false,
|
| 281 |
+
"special": false
|
| 282 |
+
},
|
| 283 |
+
"50287": {
|
| 284 |
+
"content": "[unused2]",
|
| 285 |
+
"lstrip": false,
|
| 286 |
+
"normalized": true,
|
| 287 |
+
"rstrip": false,
|
| 288 |
+
"single_word": false,
|
| 289 |
+
"special": false
|
| 290 |
+
},
|
| 291 |
+
"50288": {
|
| 292 |
+
"content": "[unused3]",
|
| 293 |
+
"lstrip": false,
|
| 294 |
+
"normalized": true,
|
| 295 |
+
"rstrip": false,
|
| 296 |
+
"single_word": false,
|
| 297 |
+
"special": false
|
| 298 |
+
},
|
| 299 |
+
"50289": {
|
| 300 |
+
"content": "[unused4]",
|
| 301 |
+
"lstrip": false,
|
| 302 |
+
"normalized": true,
|
| 303 |
+
"rstrip": false,
|
| 304 |
+
"single_word": false,
|
| 305 |
+
"special": false
|
| 306 |
+
},
|
| 307 |
+
"50290": {
|
| 308 |
+
"content": "[unused5]",
|
| 309 |
+
"lstrip": false,
|
| 310 |
+
"normalized": true,
|
| 311 |
+
"rstrip": false,
|
| 312 |
+
"single_word": false,
|
| 313 |
+
"special": false
|
| 314 |
+
},
|
| 315 |
+
"50291": {
|
| 316 |
+
"content": "[unused6]",
|
| 317 |
+
"lstrip": false,
|
| 318 |
+
"normalized": true,
|
| 319 |
+
"rstrip": false,
|
| 320 |
+
"single_word": false,
|
| 321 |
+
"special": false
|
| 322 |
+
},
|
| 323 |
+
"50292": {
|
| 324 |
+
"content": "[unused7]",
|
| 325 |
+
"lstrip": false,
|
| 326 |
+
"normalized": true,
|
| 327 |
+
"rstrip": false,
|
| 328 |
+
"single_word": false,
|
| 329 |
+
"special": false
|
| 330 |
+
},
|
| 331 |
+
"50293": {
|
| 332 |
+
"content": "[unused8]",
|
| 333 |
+
"lstrip": false,
|
| 334 |
+
"normalized": true,
|
| 335 |
+
"rstrip": false,
|
| 336 |
+
"single_word": false,
|
| 337 |
+
"special": false
|
| 338 |
+
},
|
| 339 |
+
"50294": {
|
| 340 |
+
"content": "[unused9]",
|
| 341 |
+
"lstrip": false,
|
| 342 |
+
"normalized": true,
|
| 343 |
+
"rstrip": false,
|
| 344 |
+
"single_word": false,
|
| 345 |
+
"special": false
|
| 346 |
+
},
|
| 347 |
+
"50295": {
|
| 348 |
+
"content": "[unused10]",
|
| 349 |
+
"lstrip": false,
|
| 350 |
+
"normalized": true,
|
| 351 |
+
"rstrip": false,
|
| 352 |
+
"single_word": false,
|
| 353 |
+
"special": false
|
| 354 |
+
},
|
| 355 |
+
"50296": {
|
| 356 |
+
"content": "[unused11]",
|
| 357 |
+
"lstrip": false,
|
| 358 |
+
"normalized": true,
|
| 359 |
+
"rstrip": false,
|
| 360 |
+
"single_word": false,
|
| 361 |
+
"special": false
|
| 362 |
+
},
|
| 363 |
+
"50297": {
|
| 364 |
+
"content": "[unused12]",
|
| 365 |
+
"lstrip": false,
|
| 366 |
+
"normalized": true,
|
| 367 |
+
"rstrip": false,
|
| 368 |
+
"single_word": false,
|
| 369 |
+
"special": false
|
| 370 |
+
},
|
| 371 |
+
"50298": {
|
| 372 |
+
"content": "[unused13]",
|
| 373 |
+
"lstrip": false,
|
| 374 |
+
"normalized": true,
|
| 375 |
+
"rstrip": false,
|
| 376 |
+
"single_word": false,
|
| 377 |
+
"special": false
|
| 378 |
+
},
|
| 379 |
+
"50299": {
|
| 380 |
+
"content": "[unused14]",
|
| 381 |
+
"lstrip": false,
|
| 382 |
+
"normalized": true,
|
| 383 |
+
"rstrip": false,
|
| 384 |
+
"single_word": false,
|
| 385 |
+
"special": false
|
| 386 |
+
},
|
| 387 |
+
"50300": {
|
| 388 |
+
"content": "[unused15]",
|
| 389 |
+
"lstrip": false,
|
| 390 |
+
"normalized": true,
|
| 391 |
+
"rstrip": false,
|
| 392 |
+
"single_word": false,
|
| 393 |
+
"special": false
|
| 394 |
+
},
|
| 395 |
+
"50301": {
|
| 396 |
+
"content": "[unused16]",
|
| 397 |
+
"lstrip": false,
|
| 398 |
+
"normalized": true,
|
| 399 |
+
"rstrip": false,
|
| 400 |
+
"single_word": false,
|
| 401 |
+
"special": false
|
| 402 |
+
},
|
| 403 |
+
"50302": {
|
| 404 |
+
"content": "[unused17]",
|
| 405 |
+
"lstrip": false,
|
| 406 |
+
"normalized": true,
|
| 407 |
+
"rstrip": false,
|
| 408 |
+
"single_word": false,
|
| 409 |
+
"special": false
|
| 410 |
+
},
|
| 411 |
+
"50303": {
|
| 412 |
+
"content": "[unused18]",
|
| 413 |
+
"lstrip": false,
|
| 414 |
+
"normalized": true,
|
| 415 |
+
"rstrip": false,
|
| 416 |
+
"single_word": false,
|
| 417 |
+
"special": false
|
| 418 |
+
},
|
| 419 |
+
"50304": {
|
| 420 |
+
"content": "[unused19]",
|
| 421 |
+
"lstrip": false,
|
| 422 |
+
"normalized": true,
|
| 423 |
+
"rstrip": false,
|
| 424 |
+
"single_word": false,
|
| 425 |
+
"special": false
|
| 426 |
+
},
|
| 427 |
+
"50305": {
|
| 428 |
+
"content": "[unused20]",
|
| 429 |
+
"lstrip": false,
|
| 430 |
+
"normalized": true,
|
| 431 |
+
"rstrip": false,
|
| 432 |
+
"single_word": false,
|
| 433 |
+
"special": false
|
| 434 |
+
},
|
| 435 |
+
"50306": {
|
| 436 |
+
"content": "[unused21]",
|
| 437 |
+
"lstrip": false,
|
| 438 |
+
"normalized": true,
|
| 439 |
+
"rstrip": false,
|
| 440 |
+
"single_word": false,
|
| 441 |
+
"special": false
|
| 442 |
+
},
|
| 443 |
+
"50307": {
|
| 444 |
+
"content": "[unused22]",
|
| 445 |
+
"lstrip": false,
|
| 446 |
+
"normalized": true,
|
| 447 |
+
"rstrip": false,
|
| 448 |
+
"single_word": false,
|
| 449 |
+
"special": false
|
| 450 |
+
},
|
| 451 |
+
"50308": {
|
| 452 |
+
"content": "[unused23]",
|
| 453 |
+
"lstrip": false,
|
| 454 |
+
"normalized": true,
|
| 455 |
+
"rstrip": false,
|
| 456 |
+
"single_word": false,
|
| 457 |
+
"special": false
|
| 458 |
+
},
|
| 459 |
+
"50309": {
|
| 460 |
+
"content": "[unused24]",
|
| 461 |
+
"lstrip": false,
|
| 462 |
+
"normalized": true,
|
| 463 |
+
"rstrip": false,
|
| 464 |
+
"single_word": false,
|
| 465 |
+
"special": false
|
| 466 |
+
},
|
| 467 |
+
"50310": {
|
| 468 |
+
"content": "[unused25]",
|
| 469 |
+
"lstrip": false,
|
| 470 |
+
"normalized": true,
|
| 471 |
+
"rstrip": false,
|
| 472 |
+
"single_word": false,
|
| 473 |
+
"special": false
|
| 474 |
+
},
|
| 475 |
+
"50311": {
|
| 476 |
+
"content": "[unused26]",
|
| 477 |
+
"lstrip": false,
|
| 478 |
+
"normalized": true,
|
| 479 |
+
"rstrip": false,
|
| 480 |
+
"single_word": false,
|
| 481 |
+
"special": false
|
| 482 |
+
},
|
| 483 |
+
"50312": {
|
| 484 |
+
"content": "[unused27]",
|
| 485 |
+
"lstrip": false,
|
| 486 |
+
"normalized": true,
|
| 487 |
+
"rstrip": false,
|
| 488 |
+
"single_word": false,
|
| 489 |
+
"special": false
|
| 490 |
+
},
|
| 491 |
+
"50313": {
|
| 492 |
+
"content": "[unused28]",
|
| 493 |
+
"lstrip": false,
|
| 494 |
+
"normalized": true,
|
| 495 |
+
"rstrip": false,
|
| 496 |
+
"single_word": false,
|
| 497 |
+
"special": false
|
| 498 |
+
},
|
| 499 |
+
"50314": {
|
| 500 |
+
"content": "[unused29]",
|
| 501 |
+
"lstrip": false,
|
| 502 |
+
"normalized": true,
|
| 503 |
+
"rstrip": false,
|
| 504 |
+
"single_word": false,
|
| 505 |
+
"special": false
|
| 506 |
+
},
|
| 507 |
+
"50315": {
|
| 508 |
+
"content": "[unused30]",
|
| 509 |
+
"lstrip": false,
|
| 510 |
+
"normalized": true,
|
| 511 |
+
"rstrip": false,
|
| 512 |
+
"single_word": false,
|
| 513 |
+
"special": false
|
| 514 |
+
},
|
| 515 |
+
"50316": {
|
| 516 |
+
"content": "[unused31]",
|
| 517 |
+
"lstrip": false,
|
| 518 |
+
"normalized": true,
|
| 519 |
+
"rstrip": false,
|
| 520 |
+
"single_word": false,
|
| 521 |
+
"special": false
|
| 522 |
+
},
|
| 523 |
+
"50317": {
|
| 524 |
+
"content": "[unused32]",
|
| 525 |
+
"lstrip": false,
|
| 526 |
+
"normalized": true,
|
| 527 |
+
"rstrip": false,
|
| 528 |
+
"single_word": false,
|
| 529 |
+
"special": false
|
| 530 |
+
},
|
| 531 |
+
"50318": {
|
| 532 |
+
"content": "[unused33]",
|
| 533 |
+
"lstrip": false,
|
| 534 |
+
"normalized": true,
|
| 535 |
+
"rstrip": false,
|
| 536 |
+
"single_word": false,
|
| 537 |
+
"special": false
|
| 538 |
+
},
|
| 539 |
+
"50319": {
|
| 540 |
+
"content": "[unused34]",
|
| 541 |
+
"lstrip": false,
|
| 542 |
+
"normalized": true,
|
| 543 |
+
"rstrip": false,
|
| 544 |
+
"single_word": false,
|
| 545 |
+
"special": false
|
| 546 |
+
},
|
| 547 |
+
"50320": {
|
| 548 |
+
"content": "[unused35]",
|
| 549 |
+
"lstrip": false,
|
| 550 |
+
"normalized": true,
|
| 551 |
+
"rstrip": false,
|
| 552 |
+
"single_word": false,
|
| 553 |
+
"special": false
|
| 554 |
+
},
|
| 555 |
+
"50321": {
|
| 556 |
+
"content": "[unused36]",
|
| 557 |
+
"lstrip": false,
|
| 558 |
+
"normalized": true,
|
| 559 |
+
"rstrip": false,
|
| 560 |
+
"single_word": false,
|
| 561 |
+
"special": false
|
| 562 |
+
},
|
| 563 |
+
"50322": {
|
| 564 |
+
"content": "[unused37]",
|
| 565 |
+
"lstrip": false,
|
| 566 |
+
"normalized": true,
|
| 567 |
+
"rstrip": false,
|
| 568 |
+
"single_word": false,
|
| 569 |
+
"special": false
|
| 570 |
+
},
|
| 571 |
+
"50323": {
|
| 572 |
+
"content": "[unused38]",
|
| 573 |
+
"lstrip": false,
|
| 574 |
+
"normalized": true,
|
| 575 |
+
"rstrip": false,
|
| 576 |
+
"single_word": false,
|
| 577 |
+
"special": false
|
| 578 |
+
},
|
| 579 |
+
"50324": {
|
| 580 |
+
"content": "[unused39]",
|
| 581 |
+
"lstrip": false,
|
| 582 |
+
"normalized": true,
|
| 583 |
+
"rstrip": false,
|
| 584 |
+
"single_word": false,
|
| 585 |
+
"special": false
|
| 586 |
+
},
|
| 587 |
+
"50325": {
|
| 588 |
+
"content": "[unused40]",
|
| 589 |
+
"lstrip": false,
|
| 590 |
+
"normalized": true,
|
| 591 |
+
"rstrip": false,
|
| 592 |
+
"single_word": false,
|
| 593 |
+
"special": false
|
| 594 |
+
},
|
| 595 |
+
"50326": {
|
| 596 |
+
"content": "[unused41]",
|
| 597 |
+
"lstrip": false,
|
| 598 |
+
"normalized": true,
|
| 599 |
+
"rstrip": false,
|
| 600 |
+
"single_word": false,
|
| 601 |
+
"special": false
|
| 602 |
+
},
|
| 603 |
+
"50327": {
|
| 604 |
+
"content": "[unused42]",
|
| 605 |
+
"lstrip": false,
|
| 606 |
+
"normalized": true,
|
| 607 |
+
"rstrip": false,
|
| 608 |
+
"single_word": false,
|
| 609 |
+
"special": false
|
| 610 |
+
},
|
| 611 |
+
"50328": {
|
| 612 |
+
"content": "[unused43]",
|
| 613 |
+
"lstrip": false,
|
| 614 |
+
"normalized": true,
|
| 615 |
+
"rstrip": false,
|
| 616 |
+
"single_word": false,
|
| 617 |
+
"special": false
|
| 618 |
+
},
|
| 619 |
+
"50329": {
|
| 620 |
+
"content": "[unused44]",
|
| 621 |
+
"lstrip": false,
|
| 622 |
+
"normalized": true,
|
| 623 |
+
"rstrip": false,
|
| 624 |
+
"single_word": false,
|
| 625 |
+
"special": false
|
| 626 |
+
},
|
| 627 |
+
"50330": {
|
| 628 |
+
"content": "[unused45]",
|
| 629 |
+
"lstrip": false,
|
| 630 |
+
"normalized": true,
|
| 631 |
+
"rstrip": false,
|
| 632 |
+
"single_word": false,
|
| 633 |
+
"special": false
|
| 634 |
+
},
|
| 635 |
+
"50331": {
|
| 636 |
+
"content": "[unused46]",
|
| 637 |
+
"lstrip": false,
|
| 638 |
+
"normalized": true,
|
| 639 |
+
"rstrip": false,
|
| 640 |
+
"single_word": false,
|
| 641 |
+
"special": false
|
| 642 |
+
},
|
| 643 |
+
"50332": {
|
| 644 |
+
"content": "[unused47]",
|
| 645 |
+
"lstrip": false,
|
| 646 |
+
"normalized": true,
|
| 647 |
+
"rstrip": false,
|
| 648 |
+
"single_word": false,
|
| 649 |
+
"special": false
|
| 650 |
+
},
|
| 651 |
+
"50333": {
|
| 652 |
+
"content": "[unused48]",
|
| 653 |
+
"lstrip": false,
|
| 654 |
+
"normalized": true,
|
| 655 |
+
"rstrip": false,
|
| 656 |
+
"single_word": false,
|
| 657 |
+
"special": false
|
| 658 |
+
},
|
| 659 |
+
"50334": {
|
| 660 |
+
"content": "[unused49]",
|
| 661 |
+
"lstrip": false,
|
| 662 |
+
"normalized": true,
|
| 663 |
+
"rstrip": false,
|
| 664 |
+
"single_word": false,
|
| 665 |
+
"special": false
|
| 666 |
+
},
|
| 667 |
+
"50335": {
|
| 668 |
+
"content": "[unused50]",
|
| 669 |
+
"lstrip": false,
|
| 670 |
+
"normalized": true,
|
| 671 |
+
"rstrip": false,
|
| 672 |
+
"single_word": false,
|
| 673 |
+
"special": false
|
| 674 |
+
},
|
| 675 |
+
"50336": {
|
| 676 |
+
"content": "[unused51]",
|
| 677 |
+
"lstrip": false,
|
| 678 |
+
"normalized": true,
|
| 679 |
+
"rstrip": false,
|
| 680 |
+
"single_word": false,
|
| 681 |
+
"special": false
|
| 682 |
+
},
|
| 683 |
+
"50337": {
|
| 684 |
+
"content": "[unused52]",
|
| 685 |
+
"lstrip": false,
|
| 686 |
+
"normalized": true,
|
| 687 |
+
"rstrip": false,
|
| 688 |
+
"single_word": false,
|
| 689 |
+
"special": false
|
| 690 |
+
},
|
| 691 |
+
"50338": {
|
| 692 |
+
"content": "[unused53]",
|
| 693 |
+
"lstrip": false,
|
| 694 |
+
"normalized": true,
|
| 695 |
+
"rstrip": false,
|
| 696 |
+
"single_word": false,
|
| 697 |
+
"special": false
|
| 698 |
+
},
|
| 699 |
+
"50339": {
|
| 700 |
+
"content": "[unused54]",
|
| 701 |
+
"lstrip": false,
|
| 702 |
+
"normalized": true,
|
| 703 |
+
"rstrip": false,
|
| 704 |
+
"single_word": false,
|
| 705 |
+
"special": false
|
| 706 |
+
},
|
| 707 |
+
"50340": {
|
| 708 |
+
"content": "[unused55]",
|
| 709 |
+
"lstrip": false,
|
| 710 |
+
"normalized": true,
|
| 711 |
+
"rstrip": false,
|
| 712 |
+
"single_word": false,
|
| 713 |
+
"special": false
|
| 714 |
+
},
|
| 715 |
+
"50341": {
|
| 716 |
+
"content": "[unused56]",
|
| 717 |
+
"lstrip": false,
|
| 718 |
+
"normalized": true,
|
| 719 |
+
"rstrip": false,
|
| 720 |
+
"single_word": false,
|
| 721 |
+
"special": false
|
| 722 |
+
},
|
| 723 |
+
"50342": {
|
| 724 |
+
"content": "[unused57]",
|
| 725 |
+
"lstrip": false,
|
| 726 |
+
"normalized": true,
|
| 727 |
+
"rstrip": false,
|
| 728 |
+
"single_word": false,
|
| 729 |
+
"special": false
|
| 730 |
+
},
|
| 731 |
+
"50343": {
|
| 732 |
+
"content": "[unused58]",
|
| 733 |
+
"lstrip": false,
|
| 734 |
+
"normalized": true,
|
| 735 |
+
"rstrip": false,
|
| 736 |
+
"single_word": false,
|
| 737 |
+
"special": false
|
| 738 |
+
},
|
| 739 |
+
"50344": {
|
| 740 |
+
"content": "[unused59]",
|
| 741 |
+
"lstrip": false,
|
| 742 |
+
"normalized": true,
|
| 743 |
+
"rstrip": false,
|
| 744 |
+
"single_word": false,
|
| 745 |
+
"special": false
|
| 746 |
+
},
|
| 747 |
+
"50345": {
|
| 748 |
+
"content": "[unused60]",
|
| 749 |
+
"lstrip": false,
|
| 750 |
+
"normalized": true,
|
| 751 |
+
"rstrip": false,
|
| 752 |
+
"single_word": false,
|
| 753 |
+
"special": false
|
| 754 |
+
},
|
| 755 |
+
"50346": {
|
| 756 |
+
"content": "[unused61]",
|
| 757 |
+
"lstrip": false,
|
| 758 |
+
"normalized": true,
|
| 759 |
+
"rstrip": false,
|
| 760 |
+
"single_word": false,
|
| 761 |
+
"special": false
|
| 762 |
+
},
|
| 763 |
+
"50347": {
|
| 764 |
+
"content": "[unused62]",
|
| 765 |
+
"lstrip": false,
|
| 766 |
+
"normalized": true,
|
| 767 |
+
"rstrip": false,
|
| 768 |
+
"single_word": false,
|
| 769 |
+
"special": false
|
| 770 |
+
},
|
| 771 |
+
"50348": {
|
| 772 |
+
"content": "[unused63]",
|
| 773 |
+
"lstrip": false,
|
| 774 |
+
"normalized": true,
|
| 775 |
+
"rstrip": false,
|
| 776 |
+
"single_word": false,
|
| 777 |
+
"special": false
|
| 778 |
+
},
|
| 779 |
+
"50349": {
|
| 780 |
+
"content": "[unused64]",
|
| 781 |
+
"lstrip": false,
|
| 782 |
+
"normalized": true,
|
| 783 |
+
"rstrip": false,
|
| 784 |
+
"single_word": false,
|
| 785 |
+
"special": false
|
| 786 |
+
},
|
| 787 |
+
"50350": {
|
| 788 |
+
"content": "[unused65]",
|
| 789 |
+
"lstrip": false,
|
| 790 |
+
"normalized": true,
|
| 791 |
+
"rstrip": false,
|
| 792 |
+
"single_word": false,
|
| 793 |
+
"special": false
|
| 794 |
+
},
|
| 795 |
+
"50351": {
|
| 796 |
+
"content": "[unused66]",
|
| 797 |
+
"lstrip": false,
|
| 798 |
+
"normalized": true,
|
| 799 |
+
"rstrip": false,
|
| 800 |
+
"single_word": false,
|
| 801 |
+
"special": false
|
| 802 |
+
},
|
| 803 |
+
"50352": {
|
| 804 |
+
"content": "[unused67]",
|
| 805 |
+
"lstrip": false,
|
| 806 |
+
"normalized": true,
|
| 807 |
+
"rstrip": false,
|
| 808 |
+
"single_word": false,
|
| 809 |
+
"special": false
|
| 810 |
+
},
|
| 811 |
+
"50353": {
|
| 812 |
+
"content": "[unused68]",
|
| 813 |
+
"lstrip": false,
|
| 814 |
+
"normalized": true,
|
| 815 |
+
"rstrip": false,
|
| 816 |
+
"single_word": false,
|
| 817 |
+
"special": false
|
| 818 |
+
},
|
| 819 |
+
"50354": {
|
| 820 |
+
"content": "[unused69]",
|
| 821 |
+
"lstrip": false,
|
| 822 |
+
"normalized": true,
|
| 823 |
+
"rstrip": false,
|
| 824 |
+
"single_word": false,
|
| 825 |
+
"special": false
|
| 826 |
+
},
|
| 827 |
+
"50355": {
|
| 828 |
+
"content": "[unused70]",
|
| 829 |
+
"lstrip": false,
|
| 830 |
+
"normalized": true,
|
| 831 |
+
"rstrip": false,
|
| 832 |
+
"single_word": false,
|
| 833 |
+
"special": false
|
| 834 |
+
},
|
| 835 |
+
"50356": {
|
| 836 |
+
"content": "[unused71]",
|
| 837 |
+
"lstrip": false,
|
| 838 |
+
"normalized": true,
|
| 839 |
+
"rstrip": false,
|
| 840 |
+
"single_word": false,
|
| 841 |
+
"special": false
|
| 842 |
+
},
|
| 843 |
+
"50357": {
|
| 844 |
+
"content": "[unused72]",
|
| 845 |
+
"lstrip": false,
|
| 846 |
+
"normalized": true,
|
| 847 |
+
"rstrip": false,
|
| 848 |
+
"single_word": false,
|
| 849 |
+
"special": false
|
| 850 |
+
},
|
| 851 |
+
"50358": {
|
| 852 |
+
"content": "[unused73]",
|
| 853 |
+
"lstrip": false,
|
| 854 |
+
"normalized": true,
|
| 855 |
+
"rstrip": false,
|
| 856 |
+
"single_word": false,
|
| 857 |
+
"special": false
|
| 858 |
+
},
|
| 859 |
+
"50359": {
|
| 860 |
+
"content": "[unused74]",
|
| 861 |
+
"lstrip": false,
|
| 862 |
+
"normalized": true,
|
| 863 |
+
"rstrip": false,
|
| 864 |
+
"single_word": false,
|
| 865 |
+
"special": false
|
| 866 |
+
},
|
| 867 |
+
"50360": {
|
| 868 |
+
"content": "[unused75]",
|
| 869 |
+
"lstrip": false,
|
| 870 |
+
"normalized": true,
|
| 871 |
+
"rstrip": false,
|
| 872 |
+
"single_word": false,
|
| 873 |
+
"special": false
|
| 874 |
+
},
|
| 875 |
+
"50361": {
|
| 876 |
+
"content": "[unused76]",
|
| 877 |
+
"lstrip": false,
|
| 878 |
+
"normalized": true,
|
| 879 |
+
"rstrip": false,
|
| 880 |
+
"single_word": false,
|
| 881 |
+
"special": false
|
| 882 |
+
},
|
| 883 |
+
"50362": {
|
| 884 |
+
"content": "[unused77]",
|
| 885 |
+
"lstrip": false,
|
| 886 |
+
"normalized": true,
|
| 887 |
+
"rstrip": false,
|
| 888 |
+
"single_word": false,
|
| 889 |
+
"special": false
|
| 890 |
+
},
|
| 891 |
+
"50363": {
|
| 892 |
+
"content": "[unused78]",
|
| 893 |
+
"lstrip": false,
|
| 894 |
+
"normalized": true,
|
| 895 |
+
"rstrip": false,
|
| 896 |
+
"single_word": false,
|
| 897 |
+
"special": false
|
| 898 |
+
},
|
| 899 |
+
"50364": {
|
| 900 |
+
"content": "[unused79]",
|
| 901 |
+
"lstrip": false,
|
| 902 |
+
"normalized": true,
|
| 903 |
+
"rstrip": false,
|
| 904 |
+
"single_word": false,
|
| 905 |
+
"special": false
|
| 906 |
+
},
|
| 907 |
+
"50365": {
|
| 908 |
+
"content": "[unused80]",
|
| 909 |
+
"lstrip": false,
|
| 910 |
+
"normalized": true,
|
| 911 |
+
"rstrip": false,
|
| 912 |
+
"single_word": false,
|
| 913 |
+
"special": false
|
| 914 |
+
},
|
| 915 |
+
"50366": {
|
| 916 |
+
"content": "[unused81]",
|
| 917 |
+
"lstrip": false,
|
| 918 |
+
"normalized": true,
|
| 919 |
+
"rstrip": false,
|
| 920 |
+
"single_word": false,
|
| 921 |
+
"special": false
|
| 922 |
+
},
|
| 923 |
+
"50367": {
|
| 924 |
+
"content": "[unused82]",
|
| 925 |
+
"lstrip": false,
|
| 926 |
+
"normalized": true,
|
| 927 |
+
"rstrip": false,
|
| 928 |
+
"single_word": false,
|
| 929 |
+
"special": false
|
| 930 |
+
}
|
| 931 |
+
},
|
| 932 |
+
"clean_up_tokenization_spaces": true,
|
| 933 |
+
"cls_token": "[CLS]",
|
| 934 |
+
"extra_special_tokens": {},
|
| 935 |
+
"mask_token": "[MASK]",
|
| 936 |
+
"model_input_names": [
|
| 937 |
+
"input_ids",
|
| 938 |
+
"attention_mask"
|
| 939 |
+
],
|
| 940 |
+
"model_max_length": 8192,
|
| 941 |
+
"pad_token": "[PAD]",
|
| 942 |
+
"sep_token": "[SEP]",
|
| 943 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 944 |
+
"unk_token": "[UNK]"
|
| 945 |
+
}
|