Spaces:

drdoggo
/

Medical_Image_Understanding_with_VLMs

Runtime error

File size: 2,556 Bytes

9a78b42
 
d5b900f
 
 
 
 
 
 
a5f5ad5
 
 
 
 
 
fcca540
 
 
a5f5ad5
 
 
 
 
 
 
fcca540
a5f5ad5
 
 
 
080a3e4
a5f5ad5
 
 
 
d5b900f
a5f5ad5
 
 
 
 
 
3cbf763
a5f5ad5
 
 
 
 
 
 
 
 
 
b1af7d4
adde5a6
9a78b42
adde5a6
bc22792
7fc5847
d5b900f
9a78b42

import gradio as gr
import cv2
import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM
from collections import defaultdict
tokenizer = AutoTokenizer.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")
model = AutoModelForMaskedLM.from_pretrained("microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext")

def mlm(image, text):
    print(text)
    questions_dict = {
        #'location': f'[CLS] Only [MASK] cells have a {cls_name}. [SEP]', #num of mask?
        # 'location': f'[CLS] The {cls_name} normally appears at or near the [MASK] of a cell. [SEP]',
        # 'color': f'[CLS] When a cell is histologically stained, the {cls_name} are in [MASK] color. [SEP]',
        # 'shape': f'[CLS] Mostly the shape of {cls_name} is [MASK]. [SEP]',
        'location': f'This {text} is at [MASK] place',
        'color': f'This {text} is in [MASK] color',
        'shape': f'This {text} is in [MASK] shape',
        #'def': f'{cls_name} is a  . [SEP]',
    }
    ans = list()
    res = defaultdict()
    device = 'cpu'
    for k, v in questions_dict.items():
        predicted_tokens = []
        print(v)
        tokenized_text = tokenizer.tokenize(v)
        indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)
        # Create the segments tensors.
        segments_ids = [0] * len(tokenized_text)
        
        # Convert inputs to PyTorch tensors
    
        tokens_tensor = torch.tensor([indexed_tokens]).to(device)
        segments_tensors = torch.tensor([segments_ids]).to(device)

        masked_index = tokenized_text.index('[MASK]')
        with torch.no_grad():
            predictions = model(tokens_tensor, segments_tensors)
        
        _, predicted_index = torch.topk(predictions[0][0][masked_index], 1)#.item()
        predicted_index = predicted_index.detach().cpu().numpy()
        print(predicted_index)
        for idx in predicted_index:
            predicted_tokens.append(tokenizer.convert_ids_to_tokens([idx])[0])
        # for i in range(1):
        #     res[text][k].append(predicted_tokens)
        print(predicted_tokens)
        res[k] = predicted_tokens[0]
    color, shape, loc = res['color'], res['shape'], res['location']
    ans = f'{color} color, {shape} shape, {text} at {loc}'
    print(ans)
    return image, ans

def to_black(image, text):
    output = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    outputs = [output, text]
    return outputs

interface = gr.Interface(fn=mlm, inputs=["image", "text"], outputs=["image", "text"])
interface.launch()