Qwen
/

Qwen3-4B

Text Generation

text-generation-inference

Model card Files Files and versions

add AIBOM

#10

by fatima113 - opened 21 days ago

base: refs/heads/main

←

from: refs/pr/10

Discussion Files changed

Files changed (1) hide show

Qwen_Qwen3-4B.json +67 -0

Qwen_Qwen3-4B.json ADDED Viewed

	@@ -0,0 +1,67 @@

+{
+    "bomFormat": "CycloneDX",
+    "specVersion": "1.6",
+    "serialNumber": "urn:uuid:f24b7da0-02aa-40b9-97d8-8c76756a806e",
+    "version": 1,
+    "metadata": {
+        "timestamp": "2025-06-05T09:37:34.037173+00:00",
+        "component": {
+            "type": "machine-learning-model",
+            "bom-ref": "Qwen/Qwen3-4B-d504bcdf-8093-598c-937c-206e9c62b7df",
+            "name": "Qwen/Qwen3-4B",
+            "externalReferences": [
+                {
+                    "url": "https://huggingface.co/Qwen/Qwen3-4B",
+                    "type": "documentation"
+                }
+            ],
+            "modelCard": {
+                "modelParameters": {
+                    "task": "text-generation",
+                    "architectureFamily": "qwen3",
+                    "modelArchitecture": "Qwen3ForCausalLM"
+                },
+                "properties": [
+                    {
+                        "name": "library_name",
+                        "value": "transformers"
+                    },
+                    {
+                        "name": "base_model",
+                        "value": "Qwen/Qwen3-4B-Base"
+                    }
+                ]
+            },
+            "authors": [
+                {
+                    "name": "Qwen"
+                }
+            ],
+            "licenses": [
+                {
+                    "license": {
+                        "id": "Apache-2.0",
+                        "url": "https://spdx.org/licenses/Apache-2.0.html"
+                    }
+                }
+            ],
+            "description": "**Qwen3-4B** has the following features:- Type: Causal Language Models- Training Stage: Pretraining & Post-training- Number of Parameters: 4.0B- Number of Paramaters (Non-Embedding): 3.6B- Number of Layers: 36- Number of Attention Heads (GQA): 32 for Q and 8 for KV- Context Length: 32,768 natively and [131,072 tokens with YaRN](#processing-long-texts).For more details, including benchmark evaluation, hardware requirements, and inference performance, please refer to our [blog](https://qwenlm.github.io/blog/qwen3/), [GitHub](https://github.com/QwenLM/Qwen3), and [Documentation](https://qwen.readthedocs.io/en/latest/).> [!TIP]> If you encounter significant endless repetitions, please refer to the [Best Practices](#best-practices) section for optimal sampling parameters, and set the ``presence_penalty`` to 1.5.",
+            "tags": [
+                "transformers",
+                "safetensors",
+                "qwen3",
+                "text-generation",
+                "conversational",
+                "arxiv:2309.00071",
+                "arxiv:2505.09388",
+                "base_model:Qwen/Qwen3-4B-Base",
+                "base_model:finetune:Qwen/Qwen3-4B-Base",
+                "license:apache-2.0",
+                "autotrain_compatible",
+                "text-generation-inference",
+                "endpoints_compatible",
+                "region:us"
+            ]
+        }
+    }
+}