--- license: apache-2.0 datasets: - yiye2023/GUIChat - yiye2023/GUIAct pipeline_tag: visual-question-answering --- ```python from transformers import AutoModelForCausalLM, AutoTokenizer import torch from PIL import Image, ImageDraw, ImageFont import re def draw_circle(draw, center, radius=10, width=2, outline_color=(0, 255, 0), is_fill=False, bg_color=(0, 255, 0), transparency=80): # Calculate the bounding box coordinates for the circle x1 = center[0] - radius y1 = center[1] - radius x2 = center[0] + radius y2 = center[1] + radius bbox = (x1, y1, x2, y2) # Draw the circle if is_fill: # Calculate the alpha value based on the transparency percentage alpha = int((1 - transparency / 100) * 255) # Set the fill color with the specified background color and transparency fill_color = tuple(bg_color) + (alpha,) draw.ellipse(bbox, width=width, outline=outline_color, fill=fill_color) else: draw.ellipse(bbox, width=width, outline=outline_color) def draw_point(draw, center, radius1=3, radius2=6, color=(0, 255, 0)): draw_circle(draw, center, radius=radius1, outline_color=color) draw_circle(draw, center, radius=radius2, outline_color=color) def draw_rectangle(draw, box_coords, width=2, outline_color=(0, 255, 0), is_fill=False, bg_color=(0, 255, 0), transparency=80): if is_fill: # Calculate the alpha value based on the transparency percentage alpha = int((1 - transparency / 100) * 255) # Set the fill color with the specified background color and transparency fill_color = tuple(bg_color) + (alpha,) draw.rectangle(box_coords, width=width, outline=outline_color, fill=fill_color) else: draw.rectangle(box_coords, width=width, outline=outline_color) def draw(path, out_path, response): img = Image.open(path).convert("RGB") draw = ImageDraw.Draw(img) box_coords = re.findall(r"(.*?)", response) for box in box_coords: try: x1, y1, x2, y2 = box.replace("(", "").replace(")", "").split(",") x1, y1, x2, y2 = float(x1) * img.width/1000, float(y1) * img.height/1000, float(x2) * img.width/1000, float(y2) * img.height/1000 draw_rectangle(draw, (x1, y1, x2, y2)) except: print("There were some errors while parsing the bounding box.") point_coords = re.findall(r"(.*?)", response) for point in point_coords: try: x1, y1 = point.replace("(", "").replace(")", "").split(",") x1, y1 = float(x1) * img.width/1000, float(y1) * img.height/1000 draw_point(draw, (x1, y1)) except: print("There were some errors while parsing the bounding point.") img.save(out_path) def load_model_and_tokenizer(path, device): tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained(path, device_map=device, trust_remote_code=True).eval() return model, tokenizer def infer(model, tokenizer, image_path, text): query = tokenizer.from_list_format([ {'image': image_path}, {'text': text}, ]) response, history = model.chat(tokenizer, query=query, history=None) return response if __name__ == "__main__": device = "cuda:0" model_path = "" model, tokenizer = load_model_and_tokenizer(model_path, device) while True: image_path = input("image path >>>>> ") if image_path == "stop": break query = input("Human:") if query == "stop": break response = infer(model, tokenizer, image_path, query) draw(image_path, "1.jpg", response) ```