{ "architectures": [ "SiglipForImageClassification" ], "id2label": { "0": "\"rotate anticlockwise\"", "1": "\"increase\"", "2": "\"release\"", "3": "\"switch\"", "4": "\"look up\"", "5": "\"Terminate\"", "6": "\"decrease\"", "7": "\"move backward\"", "8": "\"point\"", "9": "\"rotate clockwise\"", "10": "\"grasp\"", "11": "\"pause\"", "12": "\"move forward\"", "13": "\"Confirm\"", "14": "\"look down\"", "15": "\"move left\"", "16": "\"move right\"" }, "initializer_factor": 1.0, "label2id": { "\"Confirm\"": 13, "\"Terminate\"": 5, "\"decrease\"": 6, "\"grasp\"": 10, "\"increase\"": 1, "\"look down\"": 14, "\"look up\"": 4, "\"move backward\"": 7, "\"move forward\"": 12, "\"move left\"": 15, "\"move right\"": 16, "\"pause\"": 11, "\"point\"": 8, "\"release\"": 2, "\"rotate anticlockwise\"": 0, "\"rotate clockwise\"": 9, "\"switch\"": 3 }, "model_type": "siglip", "problem_type": "single_label_classification", "text_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "max_position_embeddings": 64, "model_type": "siglip_text_model", "num_attention_heads": 12, "num_hidden_layers": 12, "projection_size": 768, "torch_dtype": "float32", "vocab_size": 256000 }, "torch_dtype": "float32", "transformers_version": "4.50.2", "vision_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "image_size": 224, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "model_type": "siglip_vision_model", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "patch_size": 16, "torch_dtype": "float32" } }