norm_eps -> rms_norm_eps
Browse files- config.json +4 -4
config.json
CHANGED
@@ -31,7 +31,7 @@
|
|
31 |
"num_attention_heads": 12,
|
32 |
"num_key_value_heads": null,
|
33 |
"max_position_embeddings": 8192,
|
34 |
-
"
|
35 |
"dropout": 0.0,
|
36 |
"rope_theta": 10000.0,
|
37 |
"attn_impl": "xformers",
|
@@ -48,7 +48,7 @@
|
|
48 |
"num_attention_heads": 16,
|
49 |
"num_key_value_heads": null,
|
50 |
"num_hidden_layers": 1,
|
51 |
-
"
|
52 |
"dropout": 0.0,
|
53 |
"max_position_embeddings": 24576,
|
54 |
"rope_theta": 500000.0,
|
@@ -68,7 +68,7 @@
|
|
68 |
"num_attention_heads": 16,
|
69 |
"num_key_value_heads": null,
|
70 |
"num_hidden_layers": 9,
|
71 |
-
"
|
72 |
"dropout": 0.0,
|
73 |
"max_position_embeddings": 24576,
|
74 |
"rope_theta": 500000.0,
|
@@ -84,7 +84,7 @@
|
|
84 |
"num_attention_heads": 16,
|
85 |
"num_key_value_heads": null,
|
86 |
"num_hidden_layers": 25,
|
87 |
-
"
|
88 |
"dropout": 0.0,
|
89 |
"max_position_embeddings": 4096,
|
90 |
"rope_theta": 500000.0,
|
|
|
31 |
"num_attention_heads": 12,
|
32 |
"num_key_value_heads": null,
|
33 |
"max_position_embeddings": 8192,
|
34 |
+
"rms_norm_eps": 1e-05,
|
35 |
"dropout": 0.0,
|
36 |
"rope_theta": 10000.0,
|
37 |
"attn_impl": "xformers",
|
|
|
48 |
"num_attention_heads": 16,
|
49 |
"num_key_value_heads": null,
|
50 |
"num_hidden_layers": 1,
|
51 |
+
"rms_norm_eps": 1e-05,
|
52 |
"dropout": 0.0,
|
53 |
"max_position_embeddings": 24576,
|
54 |
"rope_theta": 500000.0,
|
|
|
68 |
"num_attention_heads": 16,
|
69 |
"num_key_value_heads": null,
|
70 |
"num_hidden_layers": 9,
|
71 |
+
"rms_norm_eps": 1e-05,
|
72 |
"dropout": 0.0,
|
73 |
"max_position_embeddings": 24576,
|
74 |
"rope_theta": 500000.0,
|
|
|
84 |
"num_attention_heads": 16,
|
85 |
"num_key_value_heads": null,
|
86 |
"num_hidden_layers": 25,
|
87 |
+
"rms_norm_eps": 1e-05,
|
88 |
"dropout": 0.0,
|
89 |
"max_position_embeddings": 4096,
|
90 |
"rope_theta": 500000.0,
|