{ "architectures": [ "SiglipForImageClassification" ], "id2label": { "0": "airplane", "1": "airport", "2": "baseball diamond", "3": "basketball court", "4": "beach", "5": "bridge", "6": "chaparral", "7": "church", "8": "circular farmland", "9": "cloud", "10": "commercial area", "11": "dense residential", "12": "desert", "13": "forest", "14": "freeway", "15": "golf course", "16": "ground track field", "17": "harbor", "18": "industrial area", "19": "intersection", "20": "island", "21": "lake", "22": "meadow", "23": "medium residential", "24": "mobile home park", "25": "mountain", "26": "overpass", "27": "palace", "28": "parking lot", "29": "railway", "30": "railway station", "31": "rectangular farmland", "32": "river", "33": "roundabout", "34": "runway", "35": "sea ice", "36": "ship", "37": "snowberg", "38": "sparse residential", "39": "stadium", "40": "storage tank", "41": "tennis court", "42": "terrace", "43": "thermal power station", "44": "wetland" }, "initializer_factor": 1.0, "label2id": { "airplane": 0, "airport": 1, "baseball diamond": 2, "basketball court": 3, "beach": 4, "bridge": 5, "chaparral": 6, "church": 7, "circular farmland": 8, "cloud": 9, "commercial area": 10, "dense residential": 11, "desert": 12, "forest": 13, "freeway": 14, "golf course": 15, "ground track field": 16, "harbor": 17, "industrial area": 18, "intersection": 19, "island": 20, "lake": 21, "meadow": 22, "medium residential": 23, "mobile home park": 24, "mountain": 25, "overpass": 26, "palace": 27, "parking lot": 28, "railway": 29, "railway station": 30, "rectangular farmland": 31, "river": 32, "roundabout": 33, "runway": 34, "sea ice": 35, "ship": 36, "snowberg": 37, "sparse residential": 38, "stadium": 39, "storage tank": 40, "tennis court": 41, "terrace": 42, "thermal power station": 43, "wetland": 44 }, "model_type": "siglip", "problem_type": "single_label_classification", "text_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "max_position_embeddings": 64, "model_type": "siglip_text_model", "num_attention_heads": 12, "num_hidden_layers": 12, "projection_size": 768, "torch_dtype": "float32", "vocab_size": 256000 }, "torch_dtype": "float32", "transformers_version": "4.50.0", "vision_config": { "attention_dropout": 0.0, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 768, "image_size": 224, "intermediate_size": 3072, "layer_norm_eps": 1e-06, "model_type": "siglip_vision_model", "num_attention_heads": 12, "num_channels": 3, "num_hidden_layers": 12, "patch_size": 16, "torch_dtype": "float32" } }