Upload Qwen3ForCausalLM

Files changed (3) hide show

config.json CHANGED Viewed

@@ -8,9 +8,39 @@
   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
-  "hidden_size": 2048,
   "initializer_range": 0.02,
-  "intermediate_size": 6144,
   "max_position_embeddings": 40960,
   "max_window_layers": 28,
   "model_type": "qwen3",
@@ -23,7 +53,7 @@
   "sliding_window": null,
   "tie_word_embeddings": true,
   "torch_dtype": "float32",
-  "transformers_version": "4.52.4",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

   "eos_token_id": 151645,
   "head_dim": 128,
   "hidden_act": "silu",
+  "hidden_size": 1024,
   "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
   "max_position_embeddings": 40960,
   "max_window_layers": 28,
   "model_type": "qwen3",
   "sliding_window": null,
   "tie_word_embeddings": true,
   "torch_dtype": "float32",
+  "transformers_version": "4.53.0",
   "use_cache": true,
   "use_sliding_window": false,
   "vocab_size": 151936

generation_config.json CHANGED Viewed

@@ -9,5 +9,5 @@
   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,
-  "transformers_version": "4.52.4"
 }

   "temperature": 0.6,
   "top_k": 20,
   "top_p": 0.95,
+  "transformers_version": "4.53.0"
 }

model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0013beb29261fb78ef2f1932eece31c07e1376794000cb11119d923330bcef1a
+size 2384234968