jerryzh168 commited on
Commit
7b06bcd
·
verified ·
1 Parent(s): 2798cee

Upload Qwen3ForCausalLM

Browse files
config.json CHANGED
@@ -11,6 +11,72 @@
11
  "hidden_size": 5120,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 25600,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "max_position_embeddings": 40960,
15
  "max_window_layers": 64,
16
  "model_type": "qwen3",
@@ -28,6 +94,8 @@
28
  "_data": "float8_e4m3fn",
29
  "_type": "torch.dtype"
30
  },
 
 
31
  "granularity": [
32
  {
33
  "_data": {},
@@ -40,6 +108,10 @@
40
  "_version": 1
41
  }
42
  ],
 
 
 
 
43
  "mm_config": {
44
  "_data": {
45
  "emulate": false,
@@ -56,7 +128,7 @@
56
  }
57
  },
58
  "_type": "Float8DynamicActivationFloat8WeightConfig",
59
- "_version": 1
60
  }
61
  },
62
  "quant_type_kwargs": {},
@@ -68,7 +140,7 @@
68
  "sliding_window": null,
69
  "tie_word_embeddings": false,
70
  "torch_dtype": "bfloat16",
71
- "transformers_version": "4.52.3",
72
  "use_cache": true,
73
  "use_sliding_window": false,
74
  "vocab_size": 151936
 
11
  "hidden_size": 5120,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 25600,
14
+ "layer_types": [
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention",
49
+ "full_attention",
50
+ "full_attention",
51
+ "full_attention",
52
+ "full_attention",
53
+ "full_attention",
54
+ "full_attention",
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention",
72
+ "full_attention",
73
+ "full_attention",
74
+ "full_attention",
75
+ "full_attention",
76
+ "full_attention",
77
+ "full_attention",
78
+ "full_attention"
79
+ ],
80
  "max_position_embeddings": 40960,
81
  "max_window_layers": 64,
82
  "model_type": "qwen3",
 
94
  "_data": "float8_e4m3fn",
95
  "_type": "torch.dtype"
96
  },
97
+ "activation_value_lb": null,
98
+ "activation_value_ub": null,
99
  "granularity": [
100
  {
101
  "_data": {},
 
108
  "_version": 1
109
  }
110
  ],
111
+ "kernel_preference": {
112
+ "_data": "AUTO",
113
+ "_type": "KernelPreference"
114
+ },
115
  "mm_config": {
116
  "_data": {
117
  "emulate": false,
 
128
  }
129
  },
130
  "_type": "Float8DynamicActivationFloat8WeightConfig",
131
+ "_version": 2
132
  }
133
  },
134
  "quant_type_kwargs": {},
 
140
  "sliding_window": null,
141
  "tie_word_embeddings": false,
142
  "torch_dtype": "bfloat16",
143
+ "transformers_version": "4.55.4",
144
  "use_cache": true,
145
  "use_sliding_window": false,
146
  "vocab_size": 151936
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "temperature": 0.6,
10
  "top_k": 20,
11
  "top_p": 0.95,
12
- "transformers_version": "4.52.3"
13
  }
 
9
  "temperature": 0.6,
10
  "top_k": 20,
11
  "top_p": 0.95,
12
+ "transformers_version": "4.55.4"
13
  }
pytorch_model-00001-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eed0a42c41196d852a0ad630145156d1f25ebbd5503f308233b211b81604c03e
3
- size 4971155870
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18754b49217b014b741c2830793b894b8ac95736a4dfc9ccf5a03faab262d739
3
+ size 4971145886
pytorch_model-00002-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e647e9ddf448a623b78897e10808f3a1c971453ab1a719eae153fb628e5e51c
3
- size 4973482027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8003bab8a8760880ac71c99afe3ecd5f7d234d702d6fa8f75d2a5171786641d
3
+ size 4973467115
pytorch_model-00003-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:787b871f002495b07948640c6c2268c57dda22c12e8084194d751522c99e53b5
3
- size 4879043437
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a81037f65ccdcc5ede179dd247a82661fa403cd17fa1331984d0a90f5246ab9
3
+ size 4879029357
pytorch_model-00004-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34d6f19c4295c04255fcdebccd83b9a7768584d33db6426350dce44c9d2bebc4
3
- size 4879043437
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:409351b69db8e2454490df4133efa7e9cf78e8a6d4d25cf24ef79d485419e21c
3
+ size 4879029357
pytorch_model-00005-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e584b23cec40945a33df1465d9df128b01f08780ce06d75793c787ad2199bf94
3
- size 4879043437
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc8cbe79e9d873a29f754a2489100bb4809d5c6fa87f0dc74cd4a0e05cd1bab4
3
+ size 4879029357
pytorch_model-00006-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f728859761efca7d6e8ea7a0385ced26e22b2c3e3cd18f4aa4206e363e49a9ab
3
- size 4879043437
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdecc3898b0558e9948342a485f8da0fbb5bb1f1b74036c93bc65d80cc4d1abe
3
+ size 4879029357
pytorch_model-00007-of-00007.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:403de3a4d63a3fbc964476f7cdee8aa8349242eac732621b199330d1b41e0ecd
3
- size 4876727583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bc1fca87631101fe5acebbf6cd5db3b256016083fcbbc29c59c42c60441dbec
3
+ size 4876718431
pytorch_model.bin.index.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "metadata": {
 
3
  "total_size": 34336974848
4
  },
5
  "weight_map": {
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 32762123264,
4
  "total_size": 34336974848
5
  },
6
  "weight_map": {