cryptonaut commited on
Commit
0152819
·
verified ·
1 Parent(s): e69f914

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +159 -1
config.json CHANGED
@@ -2,6 +2,144 @@
2
  "architectures": [
3
  "WednesdayModel"
4
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "bos_token_id": 1,
6
  "eos_token_id": 2,
7
  "hidden_act": "gelu_pytorch_tanh",
@@ -9,7 +147,7 @@
9
  "initializer_range": 0.02,
10
  "intermediate_size": 3072,
11
  "max_position_embeddings": 32768,
12
- "model_type": "MoE++",
13
  "moe_2layer_gate": true,
14
  "moe_expert_interval": 1,
15
  "moe_feature_no_mul_topk": true,
@@ -33,3 +171,23 @@
33
  "use_cache": true,
34
  "vocab_size": 262400
35
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "architectures": [
3
  "WednesdayModel"
4
  ],
5
+ {
6
+ "architectures": [
7
+ "Gemma3nForConditionalGeneration"
8
+ ],
9
+ "audio_config": {
10
+ "conf_attention_chunk_size": 12,
11
+ "conf_attention_context_left": 13,
12
+ "conf_attention_context_right": 0,
13
+ "conf_attention_logit_cap": 50.0,
14
+ "conf_conv_kernel_size": 5,
15
+ "conf_num_attention_heads": 8,
16
+ "conf_num_hidden_layers": 12,
17
+ "conf_positional_bias_size": 256,
18
+ "conf_reduction_factor": 4,
19
+ "conf_residual_weight": 0.5,
20
+ "gradient_clipping": 10000000000.0,
21
+ "hidden_size": 1536,
22
+ "input_feat_size": 128,
23
+ "model_type": "gemma3n_audio",
24
+ "rms_norm_eps": 1e-06,
25
+ "sscp_conv_channel_size": [
26
+ 128,
27
+ 32
28
+ ],
29
+ "sscp_conv_eps": 0.001,
30
+ "sscp_conv_kernel_size": [
31
+ [
32
+ 3,
33
+ 3
34
+ ],
35
+ [
36
+ 3,
37
+ 3
38
+ ]
39
+ ],
40
+ "sscp_conv_stride_size": [
41
+ [
42
+ 2,
43
+ 2
44
+ ],
45
+ [
46
+ 2,
47
+ 2
48
+ ]
49
+ ],
50
+ "torch_dtype": "bfloat16",
51
+ "vocab_offset": 262272,
52
+ "vocab_size": 128
53
+ },
54
+ "audio_soft_tokens_per_image": 188,
55
+ "audio_token_id": 262273,
56
+ "boa_token_id": 256000,
57
+ "boi_token_id": 255999,
58
+ "eoa_token_id": 262272,
59
+ "eoi_token_id": 262144,
60
+ "image_token_id": 262145,
61
+ "initializer_range": 0.02,
62
+ "model_type": "gemma3n",
63
+ "text_config": {
64
+ "activation_sparsity_pattern": [
65
+ 0.95,
66
+ 0.95,
67
+ 0.95,
68
+ 0.95,
69
+ 0.95,
70
+ 0.95,
71
+ 0.95,
72
+ 0.95,
73
+ 0.95,
74
+ 0.95,
75
+ 0.0,
76
+ 0.0,
77
+ 0.0,
78
+ 0.0,
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0,
84
+ 0.0,
85
+ 0.0,
86
+ 0.0,
87
+ 0.0,
88
+ 0.0,
89
+ 0.0,
90
+ 0.0,
91
+ 0.0,
92
+ 0.0,
93
+ 0.0,
94
+ 0.0
95
+ ],
96
+ "altup_active_idx": 0,
97
+ "altup_coef_clip": 120.0,
98
+ "altup_correct_scale": true,
99
+ "altup_lr_multiplier": 1.0,
100
+ "altup_num_inputs": 4,
101
+ "attention_bias": false,
102
+ "attention_dropout": 0.0,
103
+ "final_logit_softcapping": 30.0,
104
+ "head_dim": 256,
105
+ "hidden_activation": "gelu_pytorch_tanh",
106
+ "hidden_size": 2048,
107
+ "hidden_size_per_layer_input": 256,
108
+ "initializer_range": 0.02,
109
+ "intermediate_size": 8192,
110
+ "laurel_rank": 64,
111
+ "layer_types": [
112
+ "sliding_attention",
113
+ "sliding_attention",
114
+ "sliding_attention",
115
+ "sliding_attention",
116
+ "full_attention",
117
+ "sliding_attention",
118
+ "sliding_attention",
119
+ "sliding_attention",
120
+ "sliding_attention",
121
+ "full_attention",
122
+ "sliding_attention",
123
+ "sliding_attention",
124
+ "sliding_attention",
125
+ "sliding_attention",
126
+ "full_attention",
127
+ "sliding_attention",
128
+ "sliding_attention",
129
+ "sliding_attention",
130
+ "sliding_attention",
131
+ "full_attention",
132
+ "sliding_attention",
133
+ "sliding_attention",
134
+ "sliding_attention",
135
+ "sliding_attention",
136
+ "full_attention",
137
+ "sliding_attention",
138
+ "sliding_attention",
139
+ "sliding_attention",
140
+ "sliding_attention",
141
+ "full_attention"
142
+ ],
143
  "bos_token_id": 1,
144
  "eos_token_id": 2,
145
  "hidden_act": "gelu_pytorch_tanh",
 
147
  "initializer_range": 0.02,
148
  "intermediate_size": 3072,
149
  "max_position_embeddings": 32768,
150
+ "model_type": "gemma3n_text",
151
  "moe_2layer_gate": true,
152
  "moe_expert_interval": 1,
153
  "moe_feature_no_mul_topk": true,
 
171
  "use_cache": true,
172
  "vocab_size": 262400
173
  }
174
+ "torch_dtype": "bfloat16",
175
+ "transformers_version": "4.53.0.dev0",
176
+ "vision_config": {
177
+ "architecture": "mobilenetv5_300m_enc",
178
+ "do_pooling": true,
179
+ "hidden_size": 2048,
180
+ "initializer_range": 0.02,
181
+ "label_names": [
182
+ "LABEL_0",
183
+ "LABEL_1"
184
+ ],
185
+ "model_type": "gemma3n_vision",
186
+ "num_classes": 2,
187
+ "rms_norm_eps": 1e-06,
188
+ "torch_dtype": "bfloat16",
189
+ "vocab_offset": 262144,
190
+ "vocab_size": 128
191
+ },
192
+ "vision_soft_tokens_per_image": 256
193
+ }