File size: 1,073 Bytes
74b23ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from transformers import AutoProcessor, AutoModel
import torch
import gradio as gr

processor = AutoProcessor.from_pretrained("laion/CLIP-ViT-B-32-laion2B-s34B-b79K")

model = AutoModel.from_pretrained("laion/CLIP-ViT-B-32-laion2B-s34B-b79K")

labels = [f'a {emotion} face' for emotion in ['sad', 'happy', 'scared', 'angry', 'surprised', 'disgusted', 'neutral']]

text_inputs = processor(text=labels, return_tensors='pt')
text_embeds = model.get_text_features(**text_inputs)
text_embeds /= text_embeds.norm(p=2, dim=-1, keepdim=True)

def classify(image):
  image_inputs = processor(images=image, return_tensors='pt')
  image_embeds = model.get_image_features(**image_inputs)
  image_embeds /= image_embeds.norm(p=2, dim=-1, keepdim=True)
  logits_per_image = torch.matmul(image_embeds, text_embeds.t()) * model.logit_scale.exp()
  probs = logits_per_image.softmax(dim=1).squeeze(0).tolist()
  return dict(zip(labels, probs))

demo = gr.Interface(
    fn=classify,
    inputs=gr.Image(type='pil', source='webcam', streaming=True),
    outputs=gr.Label()
)

demo.launch()