Update app.py
Browse files
app.py
CHANGED
@@ -1,212 +1,103 @@
|
|
1 |
-
import os
|
2 |
-
import numpy as np
|
3 |
-
import tempfile
|
4 |
-
import requests
|
5 |
-
import cv2
|
6 |
import gradio as gr
|
7 |
-
|
8 |
-
from
|
9 |
-
import
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
headers={"Authorization": f"Bearer {COUNTGD_API_KEY}"},
|
55 |
-
files={"image": img_file},
|
56 |
-
data={"prompts": ["water bottle", "beverage can"]}
|
57 |
-
)
|
58 |
-
|
59 |
-
# Handle the response from the countgd model
|
60 |
-
if response.status_code == 200:
|
61 |
-
countgd_pred = response.json()['detections']
|
62 |
-
else:
|
63 |
-
return temp_path, f"Error calling countgd API: {response.text}"
|
64 |
-
|
65 |
-
# Filter & Hitung Kompetitor
|
66 |
-
competitor_class_count = {}
|
67 |
-
competitor_boxes = []
|
68 |
-
for obj in countgd_pred:
|
69 |
-
# Filter and process the detections
|
70 |
-
class_name = obj['label']
|
71 |
-
if class_name.lower() in ['water bottle', 'beverage can']: # Modify this as needed
|
72 |
-
competitor_class_count[class_name] = competitor_class_count.get(class_name, 0) + 1
|
73 |
-
competitor_boxes.append({
|
74 |
-
"class": class_name,
|
75 |
-
"box": obj['bbox'],
|
76 |
-
"confidence": obj['score']
|
77 |
-
})
|
78 |
-
|
79 |
-
total_competitor = sum(competitor_class_count.values())
|
80 |
-
|
81 |
-
# ========== [3] Format Output ==========
|
82 |
-
result_text = "Product Nestle\n\n"
|
83 |
-
for class_name, count in nestle_class_count.items():
|
84 |
-
result_text += f"{class_name}: {count}\n"
|
85 |
-
result_text += f"\nTotal Products Nestle: {total_nestle}\n\n"
|
86 |
-
|
87 |
-
# Unclassified Products (from countgd model)
|
88 |
-
if competitor_class_count:
|
89 |
-
result_text += f"Total Unclassified Products: {total_competitor}\n"
|
90 |
else:
|
91 |
-
|
92 |
-
|
93 |
-
# ========== [4] Visualisasi ==========
|
94 |
-
img = cv2.imread(temp_path)
|
95 |
-
|
96 |
-
# Nestlé (Hijau)
|
97 |
-
for pred in yolo_pred['predictions']:
|
98 |
-
x, y, w, h = pred['x'], pred['y'], pred['width'], pred['height']
|
99 |
-
cv2.rectangle(img, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (0,255,0), 2)
|
100 |
-
cv2.putText(img, pred['class'], (int(x-w/2), int(y-h/2-10)),
|
101 |
-
cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0,255,0), 2)
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
|
107 |
-
cv2.putText(img, f"{comp['class']} {comp['confidence']:.2f}",
|
108 |
-
(int(x1), int(y1-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 255), 2)
|
109 |
-
|
110 |
-
output_path = "/tmp/combined_output.jpg"
|
111 |
-
cv2.imwrite(output_path, img)
|
112 |
|
113 |
-
|
|
|
|
|
|
|
114 |
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
|
120 |
-
#
|
121 |
-
|
122 |
-
def convert_video_to_mp4(input_path, output_path):
|
123 |
-
try:
|
124 |
-
subprocess.run(['ffmpeg', '-i', input_path, '-vcodec', 'libx264', '-acodec', 'aac', output_path], check=True)
|
125 |
-
return output_path
|
126 |
-
except subprocess.CalledProcessError as e:
|
127 |
-
return None, f"Error converting video: {e}"
|
128 |
-
|
129 |
-
def detect_objects_in_video(video_path):
|
130 |
-
temp_output_path = "/tmp/output_video.mp4"
|
131 |
-
temp_frames_dir = tempfile.mkdtemp()
|
132 |
-
frame_count = 0
|
133 |
-
|
134 |
-
try:
|
135 |
-
# Convert video to MP4 if necessary
|
136 |
-
if not video_path.endswith(".mp4"):
|
137 |
-
video_path, err = convert_video_to_mp4(video_path, temp_output_path)
|
138 |
-
if not video_path:
|
139 |
-
return None, f"Video conversion error: {err}"
|
140 |
-
|
141 |
-
# Read video and process frames
|
142 |
-
video = cv2.VideoCapture(video_path)
|
143 |
-
frame_rate = int(video.get(cv2.CAP_PROP_FPS))
|
144 |
-
frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
|
145 |
-
frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
146 |
-
frame_size = (frame_width, frame_height)
|
147 |
-
|
148 |
-
# VideoWriter for output video
|
149 |
-
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
150 |
-
output_video = cv2.VideoWriter(temp_output_path, fourcc, frame_rate, frame_size)
|
151 |
-
|
152 |
-
while True:
|
153 |
-
ret, frame = video.read()
|
154 |
-
if not ret:
|
155 |
-
break
|
156 |
-
|
157 |
-
# Process predictions for the current frame using countgd model (same as in image detection)
|
158 |
-
frame_path = os.path.join(temp_frames_dir, f"frame_{frame_count}.jpg")
|
159 |
-
cv2.imwrite(frame_path, frame)
|
160 |
-
|
161 |
-
# Get predictions from countgd (adjust accordingly for video frames)
|
162 |
-
response = requests.post(
|
163 |
-
COUNTGD_MODEL_URL,
|
164 |
-
headers={"Authorization": f"Bearer {COUNTGD_API_KEY}"},
|
165 |
-
files={"image": open(frame_path, 'rb')},
|
166 |
-
data={"prompts": ["water bottle", "beverage can"]}
|
167 |
-
)
|
168 |
-
|
169 |
-
# Process the response (similarly to what was done for image detection)
|
170 |
-
if response.status_code == 200:
|
171 |
-
countgd_pred = response.json()['detections']
|
172 |
-
else:
|
173 |
-
continue
|
174 |
-
|
175 |
-
# Drawing detections on frames
|
176 |
-
for obj in countgd_pred:
|
177 |
-
x1, y1, x2, y2 = obj['bbox']
|
178 |
-
cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
|
179 |
-
cv2.putText(frame, f"{obj['label']} {obj['score']:.2f}",
|
180 |
-
(int(x1), int(y1-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 255), 2)
|
181 |
-
|
182 |
-
# Write processed frame to output video
|
183 |
-
output_video.write(frame)
|
184 |
-
frame_count += 1
|
185 |
-
|
186 |
-
video.release()
|
187 |
-
output_video.release()
|
188 |
-
|
189 |
-
return temp_output_path
|
190 |
-
|
191 |
-
except Exception as e:
|
192 |
-
return None, f"An error occurred: {e}"
|
193 |
-
|
194 |
-
# ========== Gradio Interface ==========
|
195 |
-
with gr.Blocks(theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate")) as iface:
|
196 |
-
gr.Markdown("""<div style="text-align: center;"><h1>NESTLE - STOCK COUNTING</h1></div>""")
|
197 |
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
from vision_agent.tools import *
|
4 |
+
from pillow_heif import register_heif_opener
|
5 |
+
from typing import Dict
|
6 |
+
|
7 |
+
# Register HEIF opener
|
8 |
+
register_heif_opener()
|
9 |
+
|
10 |
+
import vision_agent as va
|
11 |
+
|
12 |
+
def analyze_mixed_boxes(image) -> Dict:
|
13 |
+
"""
|
14 |
+
Analyzes an image containing mixed types of beverages, specifically water bottles and beverage cans.
|
15 |
+
1) Loads the image from the provided path.
|
16 |
+
2) Uses the 'countgd_object_detection' tool with the prompt 'water bottle, beverage can' to detect items.
|
17 |
+
3) Splits detections into a top shelf and bottom shelf by comparing detection center to the image's vertical midpoint.
|
18 |
+
4) Calculates how many water bottles and beverage cans are on each shelf and overall, along with average confidence scores.
|
19 |
+
5) Overlays bounding boxes on the image to visualize detections, then saves the annotated image.
|
20 |
+
6) Returns a dictionary summarizing the distribution of water bottles and beverage cans.
|
21 |
+
|
22 |
+
Parameters:
|
23 |
+
image (PIL.Image): The uploaded image.
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
dict: Summary of the analysis with keys:
|
27 |
+
- total_items (int): total number of detected items
|
28 |
+
- total_water_bottles (int): total count of detected water bottles
|
29 |
+
- total_beverage_cans (int): total count of detected beverage cans
|
30 |
+
- top_shelf (dict): counts of bottles and cans on top shelf
|
31 |
+
- bottom_shelf (dict): counts of bottles and cans on bottom shelf
|
32 |
+
- confidence (dict): average confidence scores for bottles and cans
|
33 |
+
"""
|
34 |
+
# Convert the uploaded image to a numpy array
|
35 |
+
image = np.array(image)
|
36 |
+
height, width = image.shape[:2]
|
37 |
+
|
38 |
+
# Detect water bottles and beverage cans
|
39 |
+
detections = countgd_object_detection("water bottle, beverage can", image)
|
40 |
+
|
41 |
+
# Separate detections into top shelf and bottom shelf
|
42 |
+
mid_height = height / 2
|
43 |
+
top_shelf_dets = []
|
44 |
+
bottom_shelf_dets = []
|
45 |
+
for det in detections:
|
46 |
+
cy = ((det["bbox"][1] + det["bbox"][3]) / 2) * height
|
47 |
+
if cy < mid_height:
|
48 |
+
top_shelf_dets.append(det)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
else:
|
50 |
+
bottom_shelf_dets.append(det)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
# Count items by label and calculate average confidence
|
53 |
+
water_bottles = [det for det in detections if det["label"] == "water bottle"]
|
54 |
+
beverage_cans = [det for det in detections if det["label"] == "beverage can"]
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
+
avg_bottle_conf = (sum(det["score"] for det in water_bottles) / len(water_bottles)
|
57 |
+
if water_bottles else 0)
|
58 |
+
avg_can_conf = (sum(det["score"] for det in beverage_cans) / len(beverage_cans)
|
59 |
+
if beverage_cans else 0)
|
60 |
|
61 |
+
top_water_bottles = [det for det in top_shelf_dets if det["label"] == "water bottle"]
|
62 |
+
top_beverage_cans = [det for det in top_shelf_dets if det["label"] == "beverage can"]
|
63 |
+
bottom_water_bottles = [det for det in bottom_shelf_dets if det["label"] == "water bottle"]
|
64 |
+
bottom_beverage_cans = [det for det in bottom_shelf_dets if det["label"] == "beverage can"]
|
65 |
|
66 |
+
# Overlay bounding boxes and save the annotated image
|
67 |
+
annotated_image = overlay_bounding_boxes(image, detections)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
+
# Convert annotated image back to PIL format for Gradio output
|
70 |
+
annotated_image_pil = Image.fromarray(annotated_image)
|
71 |
+
|
72 |
+
# Return the result
|
73 |
+
result = {
|
74 |
+
"total_items": len(detections),
|
75 |
+
"total_water_bottles": len(water_bottles),
|
76 |
+
"total_beverage_cans": len(beverage_cans),
|
77 |
+
"top_shelf": {
|
78 |
+
"water_bottles": len(top_water_bottles),
|
79 |
+
"beverage_cans": len(top_beverage_cans),
|
80 |
+
},
|
81 |
+
"bottom_shelf": {
|
82 |
+
"water_bottles": len(bottom_water_bottles),
|
83 |
+
"beverage_cans": len(bottom_beverage_cans),
|
84 |
+
},
|
85 |
+
"confidence": {
|
86 |
+
"water_bottles": round(avg_bottle_conf, 2),
|
87 |
+
"beverage_cans": round(avg_can_conf, 2),
|
88 |
+
},
|
89 |
+
"annotated_image": annotated_image_pil # return annotated image for display
|
90 |
+
}
|
91 |
+
|
92 |
+
return result
|
93 |
+
|
94 |
+
# Gradio Interface
|
95 |
+
iface = gr.Interface(
|
96 |
+
fn=analyze_mixed_boxes,
|
97 |
+
inputs=gr.Image(type="pil"), # allows image upload
|
98 |
+
outputs=[gr.JSON(), gr.Image(type="pil")], # display result and annotated image
|
99 |
+
title="Beverage Detection Analysis",
|
100 |
+
description="Upload an image containing water bottles and beverage cans, and the tool will analyze the distribution on shelves and display an annotated image.",
|
101 |
+
)
|
102 |
|
103 |
iface.launch()
|