import os import numpy as np import cv2 import gradio as gr import webbrowser import threading from typing import Dict, Any from vision_agent.tools import ( load_image, save_image, overlay_bounding_boxes, ocr ) def analyze_pills(image_path: str) -> Dict[str, Any]: # Load the image image = load_image(image_path) # Perform OCR to detect text detections = ocr(image) detected_texts = [d['label'] for d in detections] # Initialize drug description drug_description = "No description found." if detected_texts: drug_description = f"Click the button below to search for: {''.join(detected_texts)}" # Prepare bounding boxes for visualization boxes_for_overlay = [ { 'label': f"{d['label']}", 'score': d['score'], 'bbox': d['bbox'] # Already normalized coordinates } for d in detections ] # Create and save annotated image annotated_image = overlay_bounding_boxes(image, boxes_for_overlay) save_image(annotated_image, "annotated_pills.jpg") return detected_texts, annotated_image, drug_description def open_split_window(url): """Opens the Drugs.com search in a new window.""" threading.Thread(target=lambda: webbrowser.open(url, new=1)).start() def search_drug_online(imprint_text: list): """Opens a web search for the given pill imprint in a window.""" if imprint_text: combined_query = ''.join(imprint_text) search_url = f"https://www.drugs.com/search.php?searchterm={combined_query}" open_split_window(search_url) return f"Searching online for: {combined_query}" return "No imprint text available." # Gradio Interface with gr.Blocks() as app: gr.Markdown("## 🏥 **Pill Analysis Tool**") gr.Markdown("Upload an image of a pill to detect imprint text.") with gr.Row(): image_input = gr.Image(type="filepath", label="Upload Pill Image") output_text = gr.Textbox(label="Detected Imprint Text") output_description = gr.Textbox(label="Drug Identifiers") image_output = gr.Image(label="Annotated Image with Text Detection") analyze_button = gr.Button("Analyze Pill") search_button = gr.Button("Search Drug Info - New Browser Tab will Open") analyze_button.click( fn=analyze_pills, inputs=image_input, outputs=[output_text, image_output, output_description] ) search_button.click( fn=search_drug_online, inputs=[output_text], outputs=output_description ) app.launch()