adjust config
Browse files- config.json +1 -1
- config_emb.json +1 -1
- configuration_aria.py +4 -2
config.json
CHANGED
@@ -2,8 +2,8 @@
|
|
2 |
"architectures": [
|
3 |
"AriaForCausalLM"
|
4 |
],
|
5 |
-
"bos_token_id": 0,
|
6 |
"eos_token_id": 1,
|
|
|
7 |
"hidden_size": 1536,
|
8 |
"intermediate_size": 6144,
|
9 |
"max_seq_len": 8192,
|
|
|
2 |
"architectures": [
|
3 |
"AriaForCausalLM"
|
4 |
],
|
|
|
5 |
"eos_token_id": 1,
|
6 |
+
"pad_token_id": 2,
|
7 |
"hidden_size": 1536,
|
8 |
"intermediate_size": 6144,
|
9 |
"max_seq_len": 8192,
|
config_emb.json
CHANGED
@@ -2,8 +2,8 @@
|
|
2 |
"architectures": [
|
3 |
"AriaForSequenceEmbedding"
|
4 |
],
|
5 |
-
"bos_token_id": 0,
|
6 |
"eos_token_id": 1,
|
|
|
7 |
"hidden_size": 1536,
|
8 |
"embedding_size": 512,
|
9 |
"intermediate_size": 6144,
|
|
|
2 |
"architectures": [
|
3 |
"AriaForSequenceEmbedding"
|
4 |
],
|
|
|
5 |
"eos_token_id": 1,
|
6 |
+
"pad_token_id": 2,
|
7 |
"hidden_size": 1536,
|
8 |
"embedding_size": 512,
|
9 |
"intermediate_size": 6144,
|
configuration_aria.py
CHANGED
@@ -15,8 +15,8 @@ class AriaConfig(PretrainedConfig):
|
|
15 |
intermediate_size: int = 6144,
|
16 |
max_seq_len: int = 8192,
|
17 |
use_cache: bool = True,
|
18 |
-
bos_token_id: int = 0,
|
19 |
eos_token_id: int = 1,
|
|
|
20 |
tie_word_embeddings: bool = False,
|
21 |
output_attentions: bool = False,
|
22 |
output_hidden_states: bool = False,
|
@@ -24,7 +24,9 @@ class AriaConfig(PretrainedConfig):
|
|
24 |
**kwargs,
|
25 |
):
|
26 |
super().__init__(
|
27 |
-
|
|
|
|
|
28 |
)
|
29 |
self.vocab_size = vocab_size
|
30 |
self.hidden_size = hidden_size
|
|
|
15 |
intermediate_size: int = 6144,
|
16 |
max_seq_len: int = 8192,
|
17 |
use_cache: bool = True,
|
|
|
18 |
eos_token_id: int = 1,
|
19 |
+
pad_token_id: int = 2,
|
20 |
tie_word_embeddings: bool = False,
|
21 |
output_attentions: bool = False,
|
22 |
output_hidden_states: bool = False,
|
|
|
24 |
**kwargs,
|
25 |
):
|
26 |
super().__init__(
|
27 |
+
pad_token_id=pad_token_id,
|
28 |
+
eos_token_id=eos_token_id,
|
29 |
+
**kwargs,
|
30 |
)
|
31 |
self.vocab_size = vocab_size
|
32 |
self.hidden_size = hidden_size
|