Upload Gemma3ForConditionalGeneration
Browse files- config.json +25 -3
- generation_config.json +13 -0
- model-00001-of-00012.safetensors +3 -0
- model-00002-of-00012.safetensors +3 -0
- model-00003-of-00012.safetensors +3 -0
- model-00004-of-00012.safetensors +3 -0
- model-00005-of-00012.safetensors +3 -0
- model-00006-of-00012.safetensors +3 -0
- model-00007-of-00012.safetensors +3 -0
- model-00008-of-00012.safetensors +3 -0
- model-00009-of-00012.safetensors +3 -0
- model-00010-of-00012.safetensors +3 -0
- model-00011-of-00012.safetensors +3 -0
- model-00012-of-00012.safetensors +3 -0
- model.safetensors.index.json +0 -0
config.json
CHANGED
@@ -9,28 +9,50 @@
|
|
9 |
"mm_tokens_per_image": 256,
|
10 |
"model_type": "gemma3",
|
11 |
"text_config": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
"hidden_size": 3840,
|
|
|
13 |
"intermediate_size": 15360,
|
|
|
14 |
"model_type": "gemma3_text",
|
15 |
"num_attention_heads": 16,
|
16 |
"num_hidden_layers": 48,
|
17 |
"num_key_value_heads": 8,
|
|
|
|
|
|
|
18 |
"rope_scaling": {
|
19 |
"factor": 8.0,
|
20 |
"rope_type": "linear"
|
21 |
},
|
22 |
-
"
|
|
|
|
|
|
|
|
|
|
|
23 |
},
|
24 |
-
"torch_dtype": "
|
25 |
-
"transformers_version": "4.
|
26 |
"vision_config": {
|
|
|
|
|
27 |
"hidden_size": 1152,
|
28 |
"image_size": 896,
|
29 |
"intermediate_size": 4304,
|
|
|
30 |
"model_type": "siglip_vision_model",
|
31 |
"num_attention_heads": 16,
|
|
|
32 |
"num_hidden_layers": 27,
|
33 |
"patch_size": 14,
|
|
|
34 |
"vision_use_head": false
|
35 |
}
|
36 |
}
|
|
|
9 |
"mm_tokens_per_image": 256,
|
10 |
"model_type": "gemma3",
|
11 |
"text_config": {
|
12 |
+
"attention_bias": false,
|
13 |
+
"attention_dropout": 0.0,
|
14 |
+
"attn_logit_softcapping": null,
|
15 |
+
"cache_implementation": "hybrid",
|
16 |
+
"final_logit_softcapping": null,
|
17 |
+
"head_dim": 256,
|
18 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
19 |
"hidden_size": 3840,
|
20 |
+
"initializer_range": 0.02,
|
21 |
"intermediate_size": 15360,
|
22 |
+
"max_position_embeddings": 131072,
|
23 |
"model_type": "gemma3_text",
|
24 |
"num_attention_heads": 16,
|
25 |
"num_hidden_layers": 48,
|
26 |
"num_key_value_heads": 8,
|
27 |
+
"query_pre_attn_scalar": 256,
|
28 |
+
"rms_norm_eps": 1e-06,
|
29 |
+
"rope_local_base_freq": 10000.0,
|
30 |
"rope_scaling": {
|
31 |
"factor": 8.0,
|
32 |
"rope_type": "linear"
|
33 |
},
|
34 |
+
"rope_theta": 1000000.0,
|
35 |
+
"sliding_window": 1024,
|
36 |
+
"sliding_window_pattern": 6,
|
37 |
+
"torch_dtype": "float32",
|
38 |
+
"use_cache": true,
|
39 |
+
"vocab_size": 262208
|
40 |
},
|
41 |
+
"torch_dtype": "float32",
|
42 |
+
"transformers_version": "4.51.0.dev0",
|
43 |
"vision_config": {
|
44 |
+
"attention_dropout": 0.0,
|
45 |
+
"hidden_act": "gelu_pytorch_tanh",
|
46 |
"hidden_size": 1152,
|
47 |
"image_size": 896,
|
48 |
"intermediate_size": 4304,
|
49 |
+
"layer_norm_eps": 1e-06,
|
50 |
"model_type": "siglip_vision_model",
|
51 |
"num_attention_heads": 16,
|
52 |
+
"num_channels": 3,
|
53 |
"num_hidden_layers": 27,
|
54 |
"patch_size": 14,
|
55 |
+
"torch_dtype": "float32",
|
56 |
"vision_use_head": false
|
57 |
}
|
58 |
}
|
generation_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 2,
|
3 |
+
"cache_implementation": "hybrid",
|
4 |
+
"do_sample": true,
|
5 |
+
"eos_token_id": [
|
6 |
+
1,
|
7 |
+
106
|
8 |
+
],
|
9 |
+
"pad_token_id": 0,
|
10 |
+
"top_k": 64,
|
11 |
+
"top_p": 0.95,
|
12 |
+
"transformers_version": "4.51.0.dev0"
|
13 |
+
}
|
model-00001-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08ac4ddeeddcb48b549aef357aff564017473f0cd0738dfcef3f468cbac6a501
|
3 |
+
size 1685223128
|
model-00002-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8b1b855fd49d925477971067f9797d31ee1e206f073c3a1db086f800d54d4e60
|
3 |
+
size 4987027384
|
model-00003-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4791e85f0e6c60169546cd823d7dcbb43f6797923307d0fe82b1b04662c5f95
|
3 |
+
size 4844749824
|
model-00004-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:218ec7cc55691c2be1cc28ae07c2ac821e23a2db34fb816635319f5a858ac4bb
|
3 |
+
size 4954909736
|
model-00005-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9221b6efa97794ecd2f2df1391da104b304ed9067e98cf6eeea349d01c52a923
|
3 |
+
size 4907664584
|
model-00006-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:152951cc6b0c10a25a392c78cf8807d75a6041d2d0d3f26ab16c1527f8df7ac2
|
3 |
+
size 4954909792
|
model-00007-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:037d0adfcc0b5ec168be8f29c9baa9fb402c13c4f524f666391adb05616f43d6
|
3 |
+
size 4907664584
|
model-00008-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d394ebebca91eac0df4d127dd6773c85042cf211453022c6000ca4f9a200f5c3
|
3 |
+
size 4954909792
|
model-00009-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7399c6fa8f98030c90d875c5000891cd0faa2325c3d4ce293cc7ce3df65f15d6
|
3 |
+
size 4907664584
|
model-00010-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff7da59a922b2d69fb264cbaab632712d0d1c4eb56b6cd8093dd747c7b13f656
|
3 |
+
size 4954909792
|
model-00011-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a65f03eb7c0a953355b44887332e1b1ff1baddb9a786d6411152e189c74bc31c
|
3 |
+
size 2689808472
|
model-00012-of-00012.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a1e0bf97783f3ecc4003a927ef11bd0567faf85f78c1ee9923116d7600755a5
|
3 |
+
size 4027515024
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|