Xkev
/

Llama-3.2V-11B-cot

+{
+    "bomFormat": "CycloneDX",
+    "specVersion": "1.6",
+    "serialNumber": "urn:uuid:4ec612d6-0e01-4883-8931-409c560fd885",
+    "version": 1,
+    "metadata": {
+        "timestamp": "2025-07-21T08:19:51.846071+00:00",
+        "component": {
+            "type": "machine-learning-model",
+            "bom-ref": "Xkev/Llama-3.2V-11B-cot-9618fe23-9615-538b-9533-c062d681ea6b",
+            "licenses": [
+                {
+                    "license": {
+                        "id": "Apache-2.0",
+                        "url": "https://spdx.org/licenses/Apache-2.0.html"
+                    }
+                }
+            ],
+            "externalReferences": [
+                {
+                    "url": "https://huggingface.co/Xkev/Llama-3.2V-11B-cot",
+                    "type": "documentation"
+                }
+            ],
+            "modelCard": {
+                "modelParameters": {
+                    "datasets": [
+                        {
+                            "ref": "Xkev/LLaVA-CoT-100k-c9c723f9-0fc7-5fcc-a916-d80870eaabe0"
+                        }
+                    ],
+                    "task": "image-text-to-text",
+                    "architectureFamily": "mllama",
+                    "modelArchitecture": "MllamaForConditionalGeneration"
+                },
+                "properties": [
+                    {
+                        "name": "library_name",
+                        "value": "transformers"
+                    },
+                    {
+                        "name": "base_model",
+                        "value": "meta-llama/Llama-3.2-11B-Vision-Instruct"
+                    }
+                ]
+            },
+            "name": "Xkev/Llama-3.2V-11B-cot",
+            "authors": [
+                {
+                    "name": "Xkev"
+                }
+            ],
+            "description": "<!-- Provide a longer summary of what this model is. -->- **License:** apache-2.0- **Finetuned from model:** meta-llama/Llama-3.2-11B-Vision-Instruct",
+            "tags": [
+                "transformers",
+                "safetensors",
+                "mllama",
+                "image-to-text",
+                "image-text-to-text",
+                "conversational",
+                "en",
+                "dataset:Xkev/LLaVA-CoT-100k",
+                "arxiv:2411.10440",
+                "base_model:meta-llama/Llama-3.2-11B-Vision-Instruct",
+                "base_model:finetune:meta-llama/Llama-3.2-11B-Vision-Instruct",
+                "license:apache-2.0",
+                "text-generation-inference",
+                "endpoints_compatible",
+                "region:us"
+            ]
+        }
+    },
+    "components": [
+        {
+            "type": "data",
+            "bom-ref": "Xkev/LLaVA-CoT-100k-c9c723f9-0fc7-5fcc-a916-d80870eaabe0",
+            "name": "Xkev/LLaVA-CoT-100k",
+            "data": [
+                {
+                    "type": "dataset",
+                    "bom-ref": "Xkev/LLaVA-CoT-100k-c9c723f9-0fc7-5fcc-a916-d80870eaabe0",
+                    "name": "Xkev/LLaVA-CoT-100k",
+                    "contents": {
+                        "url": "https://huggingface.co/datasets/Xkev/LLaVA-CoT-100k",
+                        "properties": [
+                            {
+                                "name": "task_categories",
+                                "value": "visual-question-answering"
+                            },
+                            {
+                                "name": "language",
+                                "value": "en"
+                            },
+                            {
+                                "name": "size_categories",
+                                "value": "10K<n<100K"
+                            },
+                            {
+                                "name": "pretty_name",
+                                "value": "LLaVA-CoT"
+                            },
+                            {
+                                "name": "license",
+                                "value": "apache-2.0"
+                            }
+                        ]
+                    },
+                    "description": "\n\t\n\t\t\n\t\tDataset Card for LLaVA-CoT\n\t\n\n\n\t\n\t\t\n\t\tDataset Sources\n\t\n\n\nRepository: [https://github.com/PKU-YuanGroup/LLaVA-CoT]\nPaper: [https://arxiv.org/abs/2411.10440]\n\n\n\t\n\t\t\n\t\tDataset Structure\n\t\n\nThe repository includes image.zip.part-{aa-ap}, which you need to merge manually. Use the following command to combine them:\ncat image.zip.part-* > image.zip\nunzip image.zip\n\nThe train.jsonl file contains the question-answering data in the following format:\n{\n\"id\": ID,\n\"image\": IMAGE_PATH\u2026 See the full description on the dataset page: https://huggingface.co/datasets/Xkev/LLaVA-CoT-100k.",
+                    "governance": {
+                        "owners": [
+                            {
+                                "organization": {
+                                    "name": "Xkev",
+                                    "url": "https://huggingface.co/Xkev"
+                                }
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    ]
+}