cryptonaut commited on
Commit
d185fa2
·
verified ·
1 Parent(s): c78b118

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +31 -186
config.json CHANGED
@@ -1,190 +1,35 @@
1
  {
2
  "architectures": [
3
- "WednesdayModel",
4
- "Gemma3nForConditionalGeneration"
5
  ],
6
- "audio_config": {
7
- "conf_attention_chunk_size": 12,
8
- "conf_attention_context_left": 13,
9
- "conf_attention_context_right": 0,
10
- "conf_attention_logit_cap": 50.0,
11
- "conf_conv_kernel_size": 5,
12
- "conf_num_attention_heads": 8,
13
- "conf_num_hidden_layers": 12,
14
- "conf_positional_bias_size": 256,
15
- "conf_reduction_factor": 4,
16
- "conf_residual_weight": 0.5,
17
- "gradient_clipping": 10000000000.0,
18
- "hidden_size": 1536,
19
- "input_feat_size": 128,
20
- "model_type": "gemma3n_audio",
21
- "rms_norm_eps": 1e-06,
22
- "sscp_conv_channel_size": [
23
- 128,
24
- 32
25
- ],
26
- "sscp_conv_eps": 0.001,
27
- "sscp_conv_kernel_size": [
28
- [
29
- 3,
30
- 3
31
- ],
32
- [
33
- 3,
34
- 3
35
- ]
36
- ],
37
- "sscp_conv_stride_size": [
38
- [
39
- 2,
40
- 2
41
- ],
42
- [
43
- 2,
44
- 2
45
- ]
46
- ],
47
- "torch_dtype": "bfloat16",
48
- "vocab_offset": 262272,
49
- "vocab_size": 128
50
- },
51
- "audio_soft_tokens_per_image": 188,
52
- "audio_token_id": 262273,
53
- "boa_token_id": 256000,
54
- "boi_token_id": 255999,
55
- "eoa_token_id": 262272,
56
- "eoi_token_id": 262144,
57
- "image_token_id": 262145,
58
  "initializer_range": 0.02,
59
- "model_type": "gemma3n",
60
- "text_config": {
61
- "activation_sparsity_pattern": [
62
- 0.95,
63
- 0.95,
64
- 0.95,
65
- 0.95,
66
- 0.95,
67
- 0.95,
68
- 0.95,
69
- 0.95,
70
- 0.95,
71
- 0.95,
72
- 0.0,
73
- 0.0,
74
- 0.0,
75
- 0.0,
76
- 0.0,
77
- 0.0,
78
- 0.0,
79
- 0.0,
80
- 0.0,
81
- 0.0,
82
- 0.0,
83
- 0.0,
84
- 0.0,
85
- 0.0,
86
- 0.0,
87
- 0.0,
88
- 0.0,
89
- 0.0,
90
- 0.0,
91
- 0.0
92
- ],
93
- "altup_active_idx": 0,
94
- "altup_coef_clip": 120.0,
95
- "altup_correct_scale": true,
96
- "altup_lr_multiplier": 1.0,
97
- "altup_num_inputs": 4,
98
- "attention_bias": false,
99
- "attention_dropout": 0.0,
100
- "final_logit_softcapping": 30.0,
101
- "head_dim": 256,
102
- "hidden_activation": "gelu_pytorch_tanh",
103
- "hidden_size": 2048,
104
- "hidden_size_per_layer_input": 256,
105
- "initializer_range": 0.02,
106
- "intermediate_size": 8192,
107
- "laurel_rank": 64,
108
- "layer_types": [
109
- "sliding_attention",
110
- "sliding_attention",
111
- "sliding_attention",
112
- "sliding_attention",
113
- "full_attention",
114
- "sliding_attention",
115
- "sliding_attention",
116
- "sliding_attention",
117
- "sliding_attention",
118
- "full_attention",
119
- "sliding_attention",
120
- "sliding_attention",
121
- "sliding_attention",
122
- "sliding_attention",
123
- "full_attention",
124
- "sliding_attention",
125
- "sliding_attention",
126
- "sliding_attention",
127
- "sliding_attention",
128
- "full_attention",
129
- "sliding_attention",
130
- "sliding_attention",
131
- "sliding_attention",
132
- "sliding_attention",
133
- "full_attention",
134
- "sliding_attention",
135
- "sliding_attention",
136
- "sliding_attention",
137
- "sliding_attention",
138
- "full_attention"
139
- ],
140
- "bos_token_id": 1,
141
- "eos_token_id": 2,
142
- "hidden_act": "gelu_pytorch_tanh",
143
- "hidden_size": 768,
144
- "initializer_range": 0.02,
145
- "intermediate_size": 3072,
146
- "max_position_embeddings": 32768,
147
- "model_type": "gemma3n_text",
148
- "moe_2layer_gate": true,
149
- "moe_expert_interval": 1,
150
- "moe_feature_no_mul_topk": true,
151
- "moe_gate_norm_std": 1.0,
152
- "moe_use_logits_norm": true,
153
- "moe_use_mixtral_gating": true,
154
- "num_attention_heads": 12,
155
- "num_experts": [
156
- 8
157
- ],
158
- "num_hidden_layers": 4,
159
- "num_key_value_heads": 2,
160
- "pad_token_id": 0,
161
- "pretraining_tp": 1,
162
- "rms_norm_eps": 1e-06,
163
- "rope_scaling": null,
164
- "rope_theta": 1000000.0,
165
- "tie_word_embeddings": false,
166
- "torch_dtype": "float32",
167
- "transformers_version": "4.53.1",
168
- "use_cache": true,
169
- "vocab_size": 262400
170
- },
171
- "torch_dtype": "bfloat16",
172
- "transformers_version": "4.53.0.dev0",
173
- "vision_config": {
174
- "architecture": "mobilenetv5_300m_enc",
175
- "do_pooling": true,
176
- "hidden_size": 2048,
177
- "initializer_range": 0.02,
178
- "label_names": [
179
- "LABEL_0",
180
- "LABEL_1"
181
- ],
182
- "model_type": "gemma3n_vision",
183
- "num_classes": 2,
184
- "rms_norm_eps": 1e-06,
185
- "torch_dtype": "bfloat16",
186
- "vocab_offset": 262144,
187
- "vocab_size": 128
188
- },
189
- "vision_soft_tokens_per_image": 256
190
- }
 
1
  {
2
  "architectures": [
3
+ "WednesdayModel"
 
4
  ],
5
+ "bos_token_id": 1,
6
+ "eos_token_id": 2,
7
+ "hidden_act": "gelu_pytorch_tanh",
8
+ "hidden_size": 768,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  "initializer_range": 0.02,
10
+ "intermediate_size": 3072,
11
+ "max_position_embeddings": 32768,
12
+ "model_type": "MoE++",
13
+ "moe_2layer_gate": true,
14
+ "moe_expert_interval": 1,
15
+ "moe_feature_no_mul_topk": true,
16
+ "moe_gate_norm_std": 1.0,
17
+ "moe_use_logits_norm": true,
18
+ "moe_use_mixtral_gating": true,
19
+ "num_attention_heads": 12,
20
+ "num_experts": [
21
+ 8
22
+ ],
23
+ "num_hidden_layers": 4,
24
+ "num_key_value_heads": 2,
25
+ "pad_token_id": 0,
26
+ "pretraining_tp": 1,
27
+ "rms_norm_eps": 1e-06,
28
+ "rope_scaling": null,
29
+ "rope_theta": 1000000.0,
30
+ "tie_word_embeddings": false,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.53.1",
33
+ "use_cache": true,
34
+ "vocab_size": 262400
35
+ }