Spaces:

RuoyuChen
/

SMDL-Attribution

Sleeping

App Files Files Community

RuoyuChen commited on Aug 5, 2024

Commit

00d944c

1 Parent(s): 9821382

change small model

Browse files

Files changed (2) hide show

app.py +28 -4
clip_vitb_imagenet_zeroweights.pt +3 -0

app.py CHANGED Viewed

@@ -17,6 +17,8 @@ from sklearn import metrics
 import torch
 from torchvision import transforms
 from models.submodular_vit_efficient_plus import MultiModalSubModularExplanationEfficientPlus
 data_transform = transforms.Compose(
@@ -42,7 +44,7 @@ class CLIPModel_Super(torch.nn.Module):
         self.device = device
         self.model, _ = clip.load(type, device=self.device, download_root=download_root)
-        self.model = self.model.float()
     def forward(self, vision_inputs):
         """
@@ -70,18 +72,40 @@ def transform_vision_data(image):
     image = data_transform(image)
     return image
-device = "cuda" if torch.cuda.is_available() else "cpu"
 # Instantiate model
-vis_model = CLIPModel_Super("ViT-L/14", device=device, download_root="./ckpt")
 vis_model.eval()
 vis_model.to(device)
 print("load clip model")
-semantic_path = "./clip_vitl_imagenet_zeroweights.pt"
 if os.path.exists(semantic_path):
     semantic_feature = torch.load(semantic_path, map_location="cpu")
     semantic_feature = semantic_feature.to(device)
     semantic_feature = semantic_feature.type(torch.float32)
 explainer = MultiModalSubModularExplanationEfficientPlus(
         vis_model, semantic_feature, transform_vision_data, device=device,

 import torch
 from torchvision import transforms
+from tqdm import tqdm
 from models.submodular_vit_efficient_plus import MultiModalSubModularExplanationEfficientPlus
 data_transform = transforms.Compose(
         self.device = device
         self.model, _ = clip.load(type, device=self.device, download_root=download_root)
+        self.model = self.model.type(torch.float32)
     def forward(self, vision_inputs):
         """
     image = data_transform(image)
     return image
+def zeroshot_classifier(model, classnames, templates, device):
+    with torch.no_grad():
+        zeroshot_weights = []
+        for classname in tqdm(classnames):
+            texts = [template.format(classname) for template in templates] #format with class
+            texts = clip.tokenize(texts).to(device) #tokenize
+            with torch.no_grad():
+                class_embeddings = model.model.encode_text(texts)
+            class_embeddings /= class_embeddings.norm(dim=-1, keepdim=True)
+            class_embedding = class_embeddings.mean(dim=0)
+            class_embedding /= class_embedding.norm()
+            zeroshot_weights.append(class_embedding)
+        zeroshot_weights = torch.stack(zeroshot_weights).cuda()
+    return zeroshot_weights*100
+# device = "cuda" if torch.cuda.is_available() else "cpu"
+device = "cuda"
 # Instantiate model
+vis_model = CLIPModel_Super("ViT-B/16", device=device, download_root="./ckpt")
 vis_model.eval()
 vis_model.to(device)
 print("load clip model")
+semantic_path = "./clip_vitb_imagenet_zeroweights.pt"
 if os.path.exists(semantic_path):
     semantic_feature = torch.load(semantic_path, map_location="cpu")
     semantic_feature = semantic_feature.to(device)
     semantic_feature = semantic_feature.type(torch.float32)
+else:
+    semantic_feature = zeroshot_classifier(vis_model, imagenet_classes, imagenet_templates, device)
+    torch.save(semantic_feature, semantic_path)
 explainer = MultiModalSubModularExplanationEfficientPlus(
         vis_model, semantic_feature, transform_vision_data, device=device,

clip_vitb_imagenet_zeroweights.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c552bb4a3eebecf3162e53861a8368417a2d0b5c3af5454041369c89160ac34e
+size 2048880