NVILA-Lite-8B-hf-0904 / configuration_nvila_lite.py
Ligeng-Zhu's picture
Upload files with `vila-upload`.
d4540b2 verified
from typing import Any
from transformers.configuration_utils import PretrainedConfig
from transformers.models.qwen2 import Qwen2Config
from transformers.models.siglip import SiglipVisionConfig
class NVILALiteConfig(PretrainedConfig):
model_type = "nvila_lite"
sub_configs = {
"text_config": Qwen2Config,
"vision_config": SiglipVisionConfig,
}
_auto_class = "AutoConfig"
def __init__(
self,
*,
text_config: dict[str, Any] | None = None,
vision_config: dict[str, Any] | None = None,
image_token_id: int | None = None,
video_token_id: int | None = None,
**kwargs,
):
self.text_config = Qwen2Config(**text_config) if text_config is not None else Qwen2Config()
self.vision_config = SiglipVisionConfig(**vision_config) if vision_config is not None else SiglipVisionConfig()
self.image_token_id = image_token_id if image_token_id is not None else -1
self.video_token_id = video_token_id if video_token_id is not None else -1
super().__init__(**kwargs)