Upload Qwen3ForCausalLM

Browse files

Files changed (10) hide show

config.json +74 -2
generation_config.json +1 -1
pytorch_model-00001-of-00007.bin +2 -2
pytorch_model-00002-of-00007.bin +2 -2
pytorch_model-00003-of-00007.bin +2 -2
pytorch_model-00004-of-00007.bin +2 -2
pytorch_model-00005-of-00007.bin +2 -2
pytorch_model-00006-of-00007.bin +2 -2
pytorch_model-00007-of-00007.bin +2 -2
pytorch_model.bin.index.json +1 -0

config.json CHANGED Viewed

@@ -11,6 +11,72 @@
   "hidden_size": 5120,
   "initializer_range": 0.02,
   "intermediate_size": 25600,
   "max_position_embeddings": 40960,
   "max_window_layers": 64,
   "model_type": "qwen3",
@@ -28,6 +94,8 @@
             "_data": "float8_e4m3fn",
             "_type": "torch.dtype"
           },
           "granularity": [
             {
               "_data": {},
@@ -40,6 +108,10 @@
               "_version": 1
             }
           ],
           "mm_config": {
             "_data": {
               "emulate": false,
@@ -56,7 +128,7 @@
           }
         },
         "_type": "Float8DynamicActivationFloat8WeightConfig",
-        "_version": 1
       }
     },
     "quant_type_kwargs": {},
@@ -68,7 +140,7 @@
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.52.3",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

   "hidden_size": 5120,
   "initializer_range": 0.02,
   "intermediate_size": 25600,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
   "max_position_embeddings": 40960,
   "max_window_layers": 64,
   "model_type": "qwen3",
             "_data": "float8_e4m3fn",
             "_type": "torch.dtype"
           },
+          "activation_value_lb": null,
+          "activation_value_ub": null,
           "granularity": [
             {
               "_data": {},
               "_version": 1
             }
           ],
+          "kernel_preference": {
+            "_data": "AUTO",
+            "_type": "KernelPreference"
+          },
           "mm_config": {
             "_data": {
               "emulate": false,
           }
         },
         "_type": "Float8DynamicActivationFloat8WeightConfig",
+        "_version": 2
       }
     },
     "quant_type_kwargs": {},
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.55.4",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

generation_config.json CHANGED Viewed

@@ -9,5 +9,5 @@
   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,
-  "transformers_version": "4.52.3"
 }

   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,
+  "transformers_version": "4.55.4"
 }

pytorch_model-00001-of-00007.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eed0a42c41196d852a0ad630145156d1f25ebbd5503f308233b211b81604c03e
-size 4971155870

 version https://git-lfs.github.com/spec/v1
+oid sha256:18754b49217b014b741c2830793b894b8ac95736a4dfc9ccf5a03faab262d739
+size 4971145886

pytorch_model-00002-of-00007.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e647e9ddf448a623b78897e10808f3a1c971453ab1a719eae153fb628e5e51c
-size 4973482027

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8003bab8a8760880ac71c99afe3ecd5f7d234d702d6fa8f75d2a5171786641d
+size 4973467115

pytorch_model-00003-of-00007.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:787b871f002495b07948640c6c2268c57dda22c12e8084194d751522c99e53b5
-size 4879043437

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a81037f65ccdcc5ede179dd247a82661fa403cd17fa1331984d0a90f5246ab9
+size 4879029357

pytorch_model-00004-of-00007.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34d6f19c4295c04255fcdebccd83b9a7768584d33db6426350dce44c9d2bebc4
-size 4879043437

 version https://git-lfs.github.com/spec/v1
+oid sha256:409351b69db8e2454490df4133efa7e9cf78e8a6d4d25cf24ef79d485419e21c
+size 4879029357

pytorch_model-00005-of-00007.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e584b23cec40945a33df1465d9df128b01f08780ce06d75793c787ad2199bf94
-size 4879043437

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc8cbe79e9d873a29f754a2489100bb4809d5c6fa87f0dc74cd4a0e05cd1bab4
+size 4879029357

pytorch_model-00006-of-00007.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f728859761efca7d6e8ea7a0385ced26e22b2c3e3cd18f4aa4206e363e49a9ab
-size 4879043437

 version https://git-lfs.github.com/spec/v1
+oid sha256:fdecc3898b0558e9948342a485f8da0fbb5bb1f1b74036c93bc65d80cc4d1abe
+size 4879029357

pytorch_model-00007-of-00007.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:403de3a4d63a3fbc964476f7cdee8aa8349242eac732621b199330d1b41e0ecd
-size 4876727583

 version https://git-lfs.github.com/spec/v1
+oid sha256:6bc1fca87631101fe5acebbf6cd5db3b256016083fcbbc29c59c42c60441dbec
+size 4876718431

pytorch_model.bin.index.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
   "metadata": {
     "total_size": 34336974848
   },
   "weight_map": {

 {
   "metadata": {
+    "total_parameters": 32762123264,
     "total_size": 34336974848
   },
   "weight_map": {