Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +526 -0
- config.json +25 -0
- config_sentence_transformers.json +12 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 384,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,526 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:4118
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
base_model: Snowflake/snowflake-arctic-embed-s
|
10 |
+
widget:
|
11 |
+
- source_sentence: How should the client should find a minimun set of map servers?
|
12 |
+
sentences:
|
13 |
+
- Master's thesis "Leveraging Application Layer Path-Awareness with SCION" compares
|
14 |
+
Multipath TCP (MPTCP) and SCION multipath capabilities, highlighting their fundamental
|
15 |
+
differences. MPTCP aggregates multiple network interfaces (e.g., Wi-Fi, cellular),
|
16 |
+
creating virtual subflows. SCION switches inter-AS paths for a single interface.
|
17 |
+
Combining both creates a "multihomed SCION connection" with path switching across
|
18 |
+
and within interfaces, increasing path selection complexity. This document introduces
|
19 |
+
Multihomed SCION connection.
|
20 |
+
- Documentation document describing observations and a predictive model for SCION
|
21 |
+
router performance. Presents data points from benchmark runs on various hardware.
|
22 |
+
Analyzes time spent in packet processing. Proposes a tentative predictive model
|
23 |
+
based on coremark, memmove benchmark results, packet length, and a performance
|
24 |
+
index. Discusses cache/TLB influences and model refinements.
|
25 |
+
- 'Book excerpt (F-PKI, Section 18.6.2) describing proof fetching via DNS in F-PKI.
|
26 |
+
Client queries DNS resolver, which forwards to map server (acting as name server).
|
27 |
+
Map server replies with TXT records containing the entry. Highlights DNS advantages:
|
28 |
+
decentralization, caching, privacy, timeliness. Mentions DANE comparison, noting
|
29 |
+
F-PKI does not require DNSSEC. Discusses DNS message size limitations and RFC
|
30 |
+
6891.'
|
31 |
+
- source_sentence: For what percentage of source-destination AS pairs, the reduction
|
32 |
+
in CEPB by using green beaconing policy is more than 50%?
|
33 |
+
sentences:
|
34 |
+
- Research paper excerpt describing SCION's control plane components (beacon, path,
|
35 |
+
and certificate servers) and their roles in path discovery and dissemination.
|
36 |
+
Explains up-segment and down-segment concepts, and details how path servers store,
|
37 |
+
cache, and serve these segments to enable end-to-end communication, including
|
38 |
+
segment combination strategies for intra and inter ISD communication. Specifies
|
39 |
+
AS, core and local path server interactions.
|
40 |
+
- Book excerpt (Green Networking with SCION, Section 16.2.2 cont.) presenting simulation
|
41 |
+
results of green inter-domain routing. Finds green beaconing reduces CEPB for
|
42 |
+
82% of AS pairs, exceeding 50% reduction for almost 50% of pairs, estimated 210,000
|
43 |
+
tons CO2 savings in 2021. Includes distribution figures of per-terabyte CO2 emission
|
44 |
+
comparing BGP and SCION.
|
45 |
+
- 'Research paper "Protocols to Code: Formal Verification of a Next-Generation Internet
|
46 |
+
Router" detailing the environment, network, and control plane models, and the
|
47 |
+
verified security properties. Explains attacker capabilities (packet injection,
|
48 |
+
eavesdropping, deriving messages), path history, formalization of path authorization,
|
49 |
+
valley freedom, loop freedom. Describes differences in packet structure in abstract/concrete
|
50 |
+
models, particularly use of segment ID (segID) and authenticators.'
|
51 |
+
- source_sentence: What is Path-aware networking (PAN)?
|
52 |
+
sentences:
|
53 |
+
- Research paper ("Debuglet...") section describing the motivation behind Debuglet.
|
54 |
+
Presents empirical evidence demonstrating that networks treat packets differently
|
55 |
+
based on their transport protocol (UDP, TCP, ICMP, Raw IP). Shows variations in
|
56 |
+
RTT and drop rates across different protocols using measurements between global
|
57 |
+
data centers. Argues for measurement tools that mimic the behavior of actual application
|
58 |
+
traffic.
|
59 |
+
- Research paper excerpt on novel interconnection agreements in SCION. Formalizes
|
60 |
+
interconnection agreements and their utility. Provides examples of classic peering
|
61 |
+
agreements and novel mutuality-based agreements (violating GRC) in SCION. Discusses
|
62 |
+
agreement conditions and the goal of Pareto-optimal and fair utility.
|
63 |
+
- Research paper section ("Consent Routing...") describing the CONPASS protocol
|
64 |
+
design. CONPASS negotiates path segment consent between a source and destination
|
65 |
+
in path-aware networks (PANs, citing SCION, Pathlet Routing, NIRA). Defines protocol
|
66 |
+
messages (REQUEST, RESPONSE) for exchanging path segments (literals or compositions).
|
67 |
+
Explains initiator and responder roles and algorithms, including segment filtering
|
68 |
+
and path enumeration. Outlines protocol extensions for features like error signaling,
|
69 |
+
responder delegation, segment metadata, and network capabilities.
|
70 |
+
- source_sentence: What is the purpose of the `unfold` and `fold` statements when
|
71 |
+
working with predicates?
|
72 |
+
sentences:
|
73 |
+
- Book excerpt illustrating the concepts of predicates it provides to abstract over
|
74 |
+
assertions and represent the memory footprint when verifying heap structures like
|
75 |
+
single linked list. 'list' pred, fold, unfold.
|
76 |
+
- Technical document for SCION Control Message Protocol (SCMP) error messages "External
|
77 |
+
Interface Down" (Type 5) and "Internal Connectivity Down" (Type 6). Describes
|
78 |
+
message format, including ISD, AS, and Interface ID fields. Specifies usage for
|
79 |
+
informing recipients of broken data-plane links, enabling routing around failures
|
80 |
+
both internal and external to AS.
|
81 |
+
- Tutorial document titled "Freestanding SCION Network" providing a guide for setting
|
82 |
+
up a five-node SCION demo environment. Details infrastructure prerequisites (Ubuntu
|
83 |
+
VMs), AS topology, software installation (Debian packages), certificate generation,
|
84 |
+
service configuration, and testing procedures. Provides example commands and configuration
|
85 |
+
snippets. Covers troubleshooting and expected outcomes.
|
86 |
+
- source_sentence: What is the difference in how leap seconds are applied in UTC and
|
87 |
+
SCION time?
|
88 |
+
sentences:
|
89 |
+
- Documentation document for the scion-pki certificate renew command, used to renew
|
90 |
+
AS certificates. Describes the renewal process, involving a Certificate Signing
|
91 |
+
Request (CSR) signed with the existing certificate chain and key. Details options
|
92 |
+
for specifying CA targets, reusing existing keys, setting renewal thresholds (--expires-in),
|
93 |
+
output file handling (--out, --out-key, --backup, --force), and TRC validation.
|
94 |
+
Explains how to configure a subject template in JSON format.
|
95 |
+
- Book excerpt explaining the networking aspects of SCION Time Synchronization.
|
96 |
+
Highlights the use of SCION's multipath communication capabilities for enhanced
|
97 |
+
fault tolerance and security and selection of maximally disjoint path subsets
|
98 |
+
by Time Servers. The system prioritizes multiple paths measurements toward nodes,
|
99 |
+
to increase resilience against attack and improving accuracy leveraging path metadata.
|
100 |
+
- Research paper ("Debuglet...") section describing the motivation behind Debuglet.
|
101 |
+
Presents empirical evidence demonstrating that networks treat packets differently
|
102 |
+
based on their transport protocol (UDP, TCP, ICMP, Raw IP). Shows variations in
|
103 |
+
RTT and drop rates across different protocols using measurements between global
|
104 |
+
data centers. Argues for measurement tools that mimic the behavior of actual application
|
105 |
+
traffic.
|
106 |
+
pipeline_tag: sentence-similarity
|
107 |
+
library_name: sentence-transformers
|
108 |
+
metrics:
|
109 |
+
- cosine_accuracy@1
|
110 |
+
- cosine_accuracy@3
|
111 |
+
- cosine_accuracy@5
|
112 |
+
- cosine_accuracy@10
|
113 |
+
- cosine_precision@1
|
114 |
+
- cosine_precision@3
|
115 |
+
- cosine_precision@5
|
116 |
+
- cosine_precision@10
|
117 |
+
- cosine_recall@1
|
118 |
+
- cosine_recall@3
|
119 |
+
- cosine_recall@5
|
120 |
+
- cosine_recall@10
|
121 |
+
- cosine_ndcg@10
|
122 |
+
- cosine_mrr@10
|
123 |
+
- cosine_map@100
|
124 |
+
model-index:
|
125 |
+
- name: SentenceTransformer based on Snowflake/snowflake-arctic-embed-s
|
126 |
+
results:
|
127 |
+
- task:
|
128 |
+
type: information-retrieval
|
129 |
+
name: Information Retrieval
|
130 |
+
dataset:
|
131 |
+
name: val ir eval
|
132 |
+
type: val-ir-eval
|
133 |
+
metrics:
|
134 |
+
- type: cosine_accuracy@1
|
135 |
+
value: 0.7023809523809523
|
136 |
+
name: Cosine Accuracy@1
|
137 |
+
- type: cosine_accuracy@3
|
138 |
+
value: 0.8095238095238095
|
139 |
+
name: Cosine Accuracy@3
|
140 |
+
- type: cosine_accuracy@5
|
141 |
+
value: 0.8333333333333334
|
142 |
+
name: Cosine Accuracy@5
|
143 |
+
- type: cosine_accuracy@10
|
144 |
+
value: 0.9166666666666666
|
145 |
+
name: Cosine Accuracy@10
|
146 |
+
- type: cosine_precision@1
|
147 |
+
value: 0.7023809523809523
|
148 |
+
name: Cosine Precision@1
|
149 |
+
- type: cosine_precision@3
|
150 |
+
value: 0.26984126984126977
|
151 |
+
name: Cosine Precision@3
|
152 |
+
- type: cosine_precision@5
|
153 |
+
value: 0.16666666666666663
|
154 |
+
name: Cosine Precision@5
|
155 |
+
- type: cosine_precision@10
|
156 |
+
value: 0.09166666666666663
|
157 |
+
name: Cosine Precision@10
|
158 |
+
- type: cosine_recall@1
|
159 |
+
value: 0.7023809523809523
|
160 |
+
name: Cosine Recall@1
|
161 |
+
- type: cosine_recall@3
|
162 |
+
value: 0.8095238095238095
|
163 |
+
name: Cosine Recall@3
|
164 |
+
- type: cosine_recall@5
|
165 |
+
value: 0.8333333333333334
|
166 |
+
name: Cosine Recall@5
|
167 |
+
- type: cosine_recall@10
|
168 |
+
value: 0.9166666666666666
|
169 |
+
name: Cosine Recall@10
|
170 |
+
- type: cosine_ndcg@10
|
171 |
+
value: 0.8013752650113419
|
172 |
+
name: Cosine Ndcg@10
|
173 |
+
- type: cosine_mrr@10
|
174 |
+
value: 0.7654195011337871
|
175 |
+
name: Cosine Mrr@10
|
176 |
+
- type: cosine_map@100
|
177 |
+
value: 0.7679383751326645
|
178 |
+
name: Cosine Map@100
|
179 |
+
---
|
180 |
+
|
181 |
+
# SentenceTransformer based on Snowflake/snowflake-arctic-embed-s
|
182 |
+
|
183 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Snowflake/snowflake-arctic-embed-s](https://huggingface.co/Snowflake/snowflake-arctic-embed-s). It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
184 |
+
|
185 |
+
## Model Details
|
186 |
+
|
187 |
+
### Model Description
|
188 |
+
- **Model Type:** Sentence Transformer
|
189 |
+
- **Base model:** [Snowflake/snowflake-arctic-embed-s](https://huggingface.co/Snowflake/snowflake-arctic-embed-s) <!-- at revision e596f507467533e48a2e17c007f0e1dacc837b33 -->
|
190 |
+
- **Maximum Sequence Length:** 512 tokens
|
191 |
+
- **Output Dimensionality:** 384 dimensions
|
192 |
+
- **Similarity Function:** Cosine Similarity
|
193 |
+
<!-- - **Training Dataset:** Unknown -->
|
194 |
+
<!-- - **Language:** Unknown -->
|
195 |
+
<!-- - **License:** Unknown -->
|
196 |
+
|
197 |
+
### Model Sources
|
198 |
+
|
199 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
200 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
201 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
202 |
+
|
203 |
+
### Full Model Architecture
|
204 |
+
|
205 |
+
```
|
206 |
+
SentenceTransformer(
|
207 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
|
208 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
209 |
+
(2): Normalize()
|
210 |
+
)
|
211 |
+
```
|
212 |
+
|
213 |
+
## Usage
|
214 |
+
|
215 |
+
### Direct Usage (Sentence Transformers)
|
216 |
+
|
217 |
+
First install the Sentence Transformers library:
|
218 |
+
|
219 |
+
```bash
|
220 |
+
pip install -U sentence-transformers
|
221 |
+
```
|
222 |
+
|
223 |
+
Then you can load this model and run inference.
|
224 |
+
```python
|
225 |
+
from sentence_transformers import SentenceTransformer
|
226 |
+
|
227 |
+
# Download from the 🤗 Hub
|
228 |
+
model = SentenceTransformer("tjohn327/scion-snowflake-arctic-embed-s-v2")
|
229 |
+
# Run inference
|
230 |
+
sentences = [
|
231 |
+
'What is the difference in how leap seconds are applied in UTC and SCION time?',
|
232 |
+
"Book excerpt explaining the networking aspects of SCION Time Synchronization. Highlights the use of SCION's multipath communication capabilities for enhanced fault tolerance and security and selection of maximally disjoint path subsets by Time Servers. The system prioritizes multiple paths measurements toward nodes, to increase resilience against attack and improving accuracy leveraging path metadata.",
|
233 |
+
'Research paper ("Debuglet...") section describing the motivation behind Debuglet. Presents empirical evidence demonstrating that networks treat packets differently based on their transport protocol (UDP, TCP, ICMP, Raw IP). Shows variations in RTT and drop rates across different protocols using measurements between global data centers. Argues for measurement tools that mimic the behavior of actual application traffic.',
|
234 |
+
]
|
235 |
+
embeddings = model.encode(sentences)
|
236 |
+
print(embeddings.shape)
|
237 |
+
# [3, 384]
|
238 |
+
|
239 |
+
# Get the similarity scores for the embeddings
|
240 |
+
similarities = model.similarity(embeddings, embeddings)
|
241 |
+
print(similarities.shape)
|
242 |
+
# [3, 3]
|
243 |
+
```
|
244 |
+
|
245 |
+
<!--
|
246 |
+
### Direct Usage (Transformers)
|
247 |
+
|
248 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
249 |
+
|
250 |
+
</details>
|
251 |
+
-->
|
252 |
+
|
253 |
+
<!--
|
254 |
+
### Downstream Usage (Sentence Transformers)
|
255 |
+
|
256 |
+
You can finetune this model on your own dataset.
|
257 |
+
|
258 |
+
<details><summary>Click to expand</summary>
|
259 |
+
|
260 |
+
</details>
|
261 |
+
-->
|
262 |
+
|
263 |
+
<!--
|
264 |
+
### Out-of-Scope Use
|
265 |
+
|
266 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
267 |
+
-->
|
268 |
+
|
269 |
+
## Evaluation
|
270 |
+
|
271 |
+
### Metrics
|
272 |
+
|
273 |
+
#### Information Retrieval
|
274 |
+
|
275 |
+
* Dataset: `val-ir-eval`
|
276 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
277 |
+
|
278 |
+
| Metric | Value |
|
279 |
+
|:--------------------|:-----------|
|
280 |
+
| cosine_accuracy@1 | 0.7024 |
|
281 |
+
| cosine_accuracy@3 | 0.8095 |
|
282 |
+
| cosine_accuracy@5 | 0.8333 |
|
283 |
+
| cosine_accuracy@10 | 0.9167 |
|
284 |
+
| cosine_precision@1 | 0.7024 |
|
285 |
+
| cosine_precision@3 | 0.2698 |
|
286 |
+
| cosine_precision@5 | 0.1667 |
|
287 |
+
| cosine_precision@10 | 0.0917 |
|
288 |
+
| cosine_recall@1 | 0.7024 |
|
289 |
+
| cosine_recall@3 | 0.8095 |
|
290 |
+
| cosine_recall@5 | 0.8333 |
|
291 |
+
| cosine_recall@10 | 0.9167 |
|
292 |
+
| **cosine_ndcg@10** | **0.8014** |
|
293 |
+
| cosine_mrr@10 | 0.7654 |
|
294 |
+
| cosine_map@100 | 0.7679 |
|
295 |
+
|
296 |
+
<!--
|
297 |
+
## Bias, Risks and Limitations
|
298 |
+
|
299 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
300 |
+
-->
|
301 |
+
|
302 |
+
<!--
|
303 |
+
### Recommendations
|
304 |
+
|
305 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
306 |
+
-->
|
307 |
+
|
308 |
+
## Training Details
|
309 |
+
|
310 |
+
### Training Dataset
|
311 |
+
|
312 |
+
#### Unnamed Dataset
|
313 |
+
|
314 |
+
* Size: 4,118 training samples
|
315 |
+
* Columns: <code>sentence_0</code> and <code>sentence_1</code>
|
316 |
+
* Approximate statistics based on the first 1000 samples:
|
317 |
+
| | sentence_0 | sentence_1 |
|
318 |
+
|:--------|:----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
319 |
+
| type | string | string |
|
320 |
+
| details | <ul><li>min: 7 tokens</li><li>mean: 20.11 tokens</li><li>max: 64 tokens</li></ul> | <ul><li>min: 23 tokens</li><li>mean: 79.45 tokens</li><li>max: 166 tokens</li></ul> |
|
321 |
+
* Samples:
|
322 |
+
| sentence_0 | sentence_1 |
|
323 |
+
|:-----------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
324 |
+
| <code>What is the application of the End-to-End (E2E) Options header?</code> | <code>Book excerpt on Data-Plane Extensions explaining extension headers. Describes Hop-by-Hop (HBH) and End-to-End (E2E) extension headers, as in IPv6. Mentions that the extension handling was modified. Explains how SCION uses BFD and details the use of one-hop and empty paths for BFD.</code> |
|
325 |
+
| <code>How is network latency measured and compared between SCIONLab and the current Internet?</code> | <code>Research paper evaluating SCIONLab's data-plane performance, focusing on latency stretch compared to direct Internet connections. Measures RTT using SCMP-based echo application. Shows latency improvements, discusses path stretch, and shows examples of AWS path selection and network latency.</code> |
|
326 |
+
| <code>What tasks Certificate server manage, how do they relate with PCBS?</code> | <code>Research paper giving background on routing in SCION. Mentions up-, core-, and down-segments. Example topology is shown as well.</code> |
|
327 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
328 |
+
```json
|
329 |
+
{
|
330 |
+
"scale": 20.0,
|
331 |
+
"similarity_fct": "cos_sim"
|
332 |
+
}
|
333 |
+
```
|
334 |
+
|
335 |
+
### Training Hyperparameters
|
336 |
+
#### Non-Default Hyperparameters
|
337 |
+
|
338 |
+
- `eval_strategy`: steps
|
339 |
+
- `per_device_train_batch_size`: 60
|
340 |
+
- `per_device_eval_batch_size`: 60
|
341 |
+
- `num_train_epochs`: 2
|
342 |
+
- `multi_dataset_batch_sampler`: round_robin
|
343 |
+
|
344 |
+
#### All Hyperparameters
|
345 |
+
<details><summary>Click to expand</summary>
|
346 |
+
|
347 |
+
- `overwrite_output_dir`: False
|
348 |
+
- `do_predict`: False
|
349 |
+
- `eval_strategy`: steps
|
350 |
+
- `prediction_loss_only`: True
|
351 |
+
- `per_device_train_batch_size`: 60
|
352 |
+
- `per_device_eval_batch_size`: 60
|
353 |
+
- `per_gpu_train_batch_size`: None
|
354 |
+
- `per_gpu_eval_batch_size`: None
|
355 |
+
- `gradient_accumulation_steps`: 1
|
356 |
+
- `eval_accumulation_steps`: None
|
357 |
+
- `torch_empty_cache_steps`: None
|
358 |
+
- `learning_rate`: 5e-05
|
359 |
+
- `weight_decay`: 0.0
|
360 |
+
- `adam_beta1`: 0.9
|
361 |
+
- `adam_beta2`: 0.999
|
362 |
+
- `adam_epsilon`: 1e-08
|
363 |
+
- `max_grad_norm`: 1
|
364 |
+
- `num_train_epochs`: 2
|
365 |
+
- `max_steps`: -1
|
366 |
+
- `lr_scheduler_type`: linear
|
367 |
+
- `lr_scheduler_kwargs`: {}
|
368 |
+
- `warmup_ratio`: 0.0
|
369 |
+
- `warmup_steps`: 0
|
370 |
+
- `log_level`: passive
|
371 |
+
- `log_level_replica`: warning
|
372 |
+
- `log_on_each_node`: True
|
373 |
+
- `logging_nan_inf_filter`: True
|
374 |
+
- `save_safetensors`: True
|
375 |
+
- `save_on_each_node`: False
|
376 |
+
- `save_only_model`: False
|
377 |
+
- `restore_callback_states_from_checkpoint`: False
|
378 |
+
- `no_cuda`: False
|
379 |
+
- `use_cpu`: False
|
380 |
+
- `use_mps_device`: False
|
381 |
+
- `seed`: 42
|
382 |
+
- `data_seed`: None
|
383 |
+
- `jit_mode_eval`: False
|
384 |
+
- `use_ipex`: False
|
385 |
+
- `bf16`: False
|
386 |
+
- `fp16`: False
|
387 |
+
- `fp16_opt_level`: O1
|
388 |
+
- `half_precision_backend`: auto
|
389 |
+
- `bf16_full_eval`: False
|
390 |
+
- `fp16_full_eval`: False
|
391 |
+
- `tf32`: None
|
392 |
+
- `local_rank`: 0
|
393 |
+
- `ddp_backend`: None
|
394 |
+
- `tpu_num_cores`: None
|
395 |
+
- `tpu_metrics_debug`: False
|
396 |
+
- `debug`: []
|
397 |
+
- `dataloader_drop_last`: False
|
398 |
+
- `dataloader_num_workers`: 0
|
399 |
+
- `dataloader_prefetch_factor`: None
|
400 |
+
- `past_index`: -1
|
401 |
+
- `disable_tqdm`: False
|
402 |
+
- `remove_unused_columns`: True
|
403 |
+
- `label_names`: None
|
404 |
+
- `load_best_model_at_end`: False
|
405 |
+
- `ignore_data_skip`: False
|
406 |
+
- `fsdp`: []
|
407 |
+
- `fsdp_min_num_params`: 0
|
408 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
409 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
410 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
411 |
+
- `deepspeed`: None
|
412 |
+
- `label_smoothing_factor`: 0.0
|
413 |
+
- `optim`: adamw_torch
|
414 |
+
- `optim_args`: None
|
415 |
+
- `adafactor`: False
|
416 |
+
- `group_by_length`: False
|
417 |
+
- `length_column_name`: length
|
418 |
+
- `ddp_find_unused_parameters`: None
|
419 |
+
- `ddp_bucket_cap_mb`: None
|
420 |
+
- `ddp_broadcast_buffers`: False
|
421 |
+
- `dataloader_pin_memory`: True
|
422 |
+
- `dataloader_persistent_workers`: False
|
423 |
+
- `skip_memory_metrics`: True
|
424 |
+
- `use_legacy_prediction_loop`: False
|
425 |
+
- `push_to_hub`: False
|
426 |
+
- `resume_from_checkpoint`: None
|
427 |
+
- `hub_model_id`: None
|
428 |
+
- `hub_strategy`: every_save
|
429 |
+
- `hub_private_repo`: None
|
430 |
+
- `hub_always_push`: False
|
431 |
+
- `gradient_checkpointing`: False
|
432 |
+
- `gradient_checkpointing_kwargs`: None
|
433 |
+
- `include_inputs_for_metrics`: False
|
434 |
+
- `include_for_metrics`: []
|
435 |
+
- `eval_do_concat_batches`: True
|
436 |
+
- `fp16_backend`: auto
|
437 |
+
- `push_to_hub_model_id`: None
|
438 |
+
- `push_to_hub_organization`: None
|
439 |
+
- `mp_parameters`:
|
440 |
+
- `auto_find_batch_size`: False
|
441 |
+
- `full_determinism`: False
|
442 |
+
- `torchdynamo`: None
|
443 |
+
- `ray_scope`: last
|
444 |
+
- `ddp_timeout`: 1800
|
445 |
+
- `torch_compile`: False
|
446 |
+
- `torch_compile_backend`: None
|
447 |
+
- `torch_compile_mode`: None
|
448 |
+
- `dispatch_batches`: None
|
449 |
+
- `split_batches`: None
|
450 |
+
- `include_tokens_per_second`: False
|
451 |
+
- `include_num_input_tokens_seen`: False
|
452 |
+
- `neftune_noise_alpha`: None
|
453 |
+
- `optim_target_modules`: None
|
454 |
+
- `batch_eval_metrics`: False
|
455 |
+
- `eval_on_start`: False
|
456 |
+
- `use_liger_kernel`: False
|
457 |
+
- `eval_use_gather_object`: False
|
458 |
+
- `average_tokens_across_devices`: False
|
459 |
+
- `prompts`: None
|
460 |
+
- `batch_sampler`: batch_sampler
|
461 |
+
- `multi_dataset_batch_sampler`: round_robin
|
462 |
+
|
463 |
+
</details>
|
464 |
+
|
465 |
+
### Training Logs
|
466 |
+
| Epoch | Step | val-ir-eval_cosine_ndcg@10 |
|
467 |
+
|:-----:|:----:|:--------------------------:|
|
468 |
+
| 1.0 | 35 | 0.7802 |
|
469 |
+
| 2.0 | 70 | 0.8014 |
|
470 |
+
|
471 |
+
|
472 |
+
### Framework Versions
|
473 |
+
- Python: 3.12.3
|
474 |
+
- Sentence Transformers: 3.4.1
|
475 |
+
- Transformers: 4.49.0
|
476 |
+
- PyTorch: 2.6.0+cu124
|
477 |
+
- Accelerate: 1.4.0
|
478 |
+
- Datasets: 3.3.2
|
479 |
+
- Tokenizers: 0.21.0
|
480 |
+
|
481 |
+
## Citation
|
482 |
+
|
483 |
+
### BibTeX
|
484 |
+
|
485 |
+
#### Sentence Transformers
|
486 |
+
```bibtex
|
487 |
+
@inproceedings{reimers-2019-sentence-bert,
|
488 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
489 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
490 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
491 |
+
month = "11",
|
492 |
+
year = "2019",
|
493 |
+
publisher = "Association for Computational Linguistics",
|
494 |
+
url = "https://arxiv.org/abs/1908.10084",
|
495 |
+
}
|
496 |
+
```
|
497 |
+
|
498 |
+
#### MultipleNegativesRankingLoss
|
499 |
+
```bibtex
|
500 |
+
@misc{henderson2017efficient,
|
501 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
502 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
503 |
+
year={2017},
|
504 |
+
eprint={1705.00652},
|
505 |
+
archivePrefix={arXiv},
|
506 |
+
primaryClass={cs.CL}
|
507 |
+
}
|
508 |
+
```
|
509 |
+
|
510 |
+
<!--
|
511 |
+
## Glossary
|
512 |
+
|
513 |
+
*Clearly define terms in order to be accessible across audiences.*
|
514 |
+
-->
|
515 |
+
|
516 |
+
<!--
|
517 |
+
## Model Card Authors
|
518 |
+
|
519 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
520 |
+
-->
|
521 |
+
|
522 |
+
<!--
|
523 |
+
## Model Card Contact
|
524 |
+
|
525 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
526 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Snowflake/snowflake-arctic-embed-s",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 384,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 1536,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 12,
|
17 |
+
"num_hidden_layers": 12,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.49.0",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 30522
|
25 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.4.1",
|
4 |
+
"transformers": "4.49.0",
|
5 |
+
"pytorch": "2.6.0+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {
|
8 |
+
"query": "Represent this sentence for searching relevant passages: "
|
9 |
+
},
|
10 |
+
"default_prompt_name": null,
|
11 |
+
"similarity_fn_name": "cosine"
|
12 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:50a0dd0f8fef98225171fce2315999016c532cf9ec9a7642c2208911359cfb39
|
3 |
+
size 132870584
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"max_length": 512,
|
51 |
+
"model_max_length": 512,
|
52 |
+
"never_split": null,
|
53 |
+
"pad_to_multiple_of": null,
|
54 |
+
"pad_token": "[PAD]",
|
55 |
+
"pad_token_type_id": 0,
|
56 |
+
"padding_side": "right",
|
57 |
+
"sep_token": "[SEP]",
|
58 |
+
"stride": 0,
|
59 |
+
"strip_accents": null,
|
60 |
+
"tokenize_chinese_chars": true,
|
61 |
+
"tokenizer_class": "BertTokenizer",
|
62 |
+
"truncation_side": "right",
|
63 |
+
"truncation_strategy": "longest_first",
|
64 |
+
"unk_token": "[UNK]"
|
65 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|