muhammadsalmanalfaridzi commited on
Commit
0f3d01e
·
verified ·
1 Parent(s): bd977a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -203
app.py CHANGED
@@ -1,212 +1,103 @@
1
- import os
2
- import numpy as np
3
- import tempfile
4
- import requests
5
- import cv2
6
  import gradio as gr
7
- from dotenv import load_dotenv
8
- from roboflow import Roboflow
9
- import subprocess
10
-
11
- # ========== Konfigurasi ==========
12
- load_dotenv()
13
-
14
- # Roboflow Config
15
- rf_api_key = os.getenv("ROBOFLOW_API_KEY")
16
- workspace = os.getenv("ROBOFLOW_WORKSPACE")
17
- project_name = os.getenv("ROBOFLOW_PROJECT")
18
- model_version = int(os.getenv("ROBOFLOW_MODEL_VERSION"))
19
-
20
- # countgd Model Configuration
21
- COUNTGD_API_KEY = os.getenv("COUNTGD_API_KEY")
22
- COUNTGD_MODEL_URL = "https://api.landing.ai/v1/tools/countgd-object-detection" # Replace with the correct API endpoint
23
-
24
- # Inisialisasi Model
25
- rf = Roboflow(api_key=rf_api_key)
26
- project = rf.workspace(workspace).project(project_name)
27
- yolo_model = project.version(model_version).model
28
-
29
- # ========== Fungsi Deteksi Kombinasi ==========
30
- def detect_combined(image):
31
- with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
32
- image.save(temp_file, format="JPEG")
33
- temp_path = temp_file.name
34
-
35
- try:
36
- # ========== [1] YOLO: Deteksi Produk Nestlé (Per Class) ==========
37
- yolo_pred = yolo_model.predict(temp_path, confidence=50, overlap=80).json()
38
-
39
- # Hitung per class Nestlé
40
- nestle_class_count = {}
41
- nestle_boxes = []
42
- for pred in yolo_pred['predictions']:
43
- class_name = pred['class']
44
- nestle_class_count[class_name] = nestle_class_count.get(class_name, 0) + 1
45
- nestle_boxes.append((pred['x'], pred['y'], pred['width'], pred['height']))
46
-
47
- total_nestle = sum(nestle_class_count.values())
48
-
49
- # ========== [2] countgd: Deteksi Produk dengan countgd Model ==========
50
- # Make a request to the countgd model API (adjust parameters accordingly)
51
- with open(temp_path, 'rb') as img_file:
52
- response = requests.post(
53
- COUNTGD_MODEL_URL,
54
- headers={"Authorization": f"Bearer {COUNTGD_API_KEY}"},
55
- files={"image": img_file},
56
- data={"prompts": ["water bottle", "beverage can"]}
57
- )
58
-
59
- # Handle the response from the countgd model
60
- if response.status_code == 200:
61
- countgd_pred = response.json()['detections']
62
- else:
63
- return temp_path, f"Error calling countgd API: {response.text}"
64
-
65
- # Filter & Hitung Kompetitor
66
- competitor_class_count = {}
67
- competitor_boxes = []
68
- for obj in countgd_pred:
69
- # Filter and process the detections
70
- class_name = obj['label']
71
- if class_name.lower() in ['water bottle', 'beverage can']: # Modify this as needed
72
- competitor_class_count[class_name] = competitor_class_count.get(class_name, 0) + 1
73
- competitor_boxes.append({
74
- "class": class_name,
75
- "box": obj['bbox'],
76
- "confidence": obj['score']
77
- })
78
-
79
- total_competitor = sum(competitor_class_count.values())
80
-
81
- # ========== [3] Format Output ==========
82
- result_text = "Product Nestle\n\n"
83
- for class_name, count in nestle_class_count.items():
84
- result_text += f"{class_name}: {count}\n"
85
- result_text += f"\nTotal Products Nestle: {total_nestle}\n\n"
86
-
87
- # Unclassified Products (from countgd model)
88
- if competitor_class_count:
89
- result_text += f"Total Unclassified Products: {total_competitor}\n"
90
  else:
91
- result_text += "No Unclassified Products detected\n"
92
-
93
- # ========== [4] Visualisasi ==========
94
- img = cv2.imread(temp_path)
95
-
96
- # Nestlé (Hijau)
97
- for pred in yolo_pred['predictions']:
98
- x, y, w, h = pred['x'], pred['y'], pred['width'], pred['height']
99
- cv2.rectangle(img, (int(x-w/2), int(y-h/2)), (int(x+w/2), int(y+h/2)), (0,255,0), 2)
100
- cv2.putText(img, pred['class'], (int(x-w/2), int(y-h/2-10)),
101
- cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0,255,0), 2)
102
 
103
- # Kompetitor (Merah) with countgd detections
104
- for comp in competitor_boxes:
105
- x1, y1, x2, y2 = comp['box']
106
- cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
107
- cv2.putText(img, f"{comp['class']} {comp['confidence']:.2f}",
108
- (int(x1), int(y1-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 255), 2)
109
-
110
- output_path = "/tmp/combined_output.jpg"
111
- cv2.imwrite(output_path, img)
112
 
113
- return output_path, result_text
 
 
 
114
 
115
- except Exception as e:
116
- return temp_path, f"Error: {str(e)}"
117
- finally:
118
- os.remove(temp_path)
119
 
120
- # ========== Fungsi untuk Deteksi Video ==========
121
-
122
- def convert_video_to_mp4(input_path, output_path):
123
- try:
124
- subprocess.run(['ffmpeg', '-i', input_path, '-vcodec', 'libx264', '-acodec', 'aac', output_path], check=True)
125
- return output_path
126
- except subprocess.CalledProcessError as e:
127
- return None, f"Error converting video: {e}"
128
-
129
- def detect_objects_in_video(video_path):
130
- temp_output_path = "/tmp/output_video.mp4"
131
- temp_frames_dir = tempfile.mkdtemp()
132
- frame_count = 0
133
-
134
- try:
135
- # Convert video to MP4 if necessary
136
- if not video_path.endswith(".mp4"):
137
- video_path, err = convert_video_to_mp4(video_path, temp_output_path)
138
- if not video_path:
139
- return None, f"Video conversion error: {err}"
140
-
141
- # Read video and process frames
142
- video = cv2.VideoCapture(video_path)
143
- frame_rate = int(video.get(cv2.CAP_PROP_FPS))
144
- frame_width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
145
- frame_height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
146
- frame_size = (frame_width, frame_height)
147
-
148
- # VideoWriter for output video
149
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
150
- output_video = cv2.VideoWriter(temp_output_path, fourcc, frame_rate, frame_size)
151
-
152
- while True:
153
- ret, frame = video.read()
154
- if not ret:
155
- break
156
-
157
- # Process predictions for the current frame using countgd model (same as in image detection)
158
- frame_path = os.path.join(temp_frames_dir, f"frame_{frame_count}.jpg")
159
- cv2.imwrite(frame_path, frame)
160
-
161
- # Get predictions from countgd (adjust accordingly for video frames)
162
- response = requests.post(
163
- COUNTGD_MODEL_URL,
164
- headers={"Authorization": f"Bearer {COUNTGD_API_KEY}"},
165
- files={"image": open(frame_path, 'rb')},
166
- data={"prompts": ["water bottle", "beverage can"]}
167
- )
168
-
169
- # Process the response (similarly to what was done for image detection)
170
- if response.status_code == 200:
171
- countgd_pred = response.json()['detections']
172
- else:
173
- continue
174
-
175
- # Drawing detections on frames
176
- for obj in countgd_pred:
177
- x1, y1, x2, y2 = obj['bbox']
178
- cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
179
- cv2.putText(frame, f"{obj['label']} {obj['score']:.2f}",
180
- (int(x1), int(y1-10)), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 255), 2)
181
-
182
- # Write processed frame to output video
183
- output_video.write(frame)
184
- frame_count += 1
185
-
186
- video.release()
187
- output_video.release()
188
-
189
- return temp_output_path
190
-
191
- except Exception as e:
192
- return None, f"An error occurred: {e}"
193
-
194
- # ========== Gradio Interface ==========
195
- with gr.Blocks(theme=gr.themes.Base(primary_hue="teal", secondary_hue="teal", neutral_hue="slate")) as iface:
196
- gr.Markdown("""<div style="text-align: center;"><h1>NESTLE - STOCK COUNTING</h1></div>""")
197
 
198
- with gr.Row():
199
- with gr.Column():
200
- input_image = gr.Image(type="pil", label="Input Image")
201
- detect_image_button = gr.Button("Detect Image")
202
- output_image = gr.Image(label="Detect Object")
203
- output_text = gr.Textbox(label="Counting Object")
204
- detect_image_button.click(fn=detect_combined, inputs=input_image, outputs=[output_image, output_text])
205
-
206
- with gr.Column():
207
- input_video = gr.Video(label="Input Video")
208
- detect_video_button = gr.Button("Detect Video")
209
- output_video = gr.Video(label="Output Video")
210
- detect_video_button.click(fn=detect_objects_in_video, inputs=input_video, outputs=[output_video])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
 
212
  iface.launch()
 
 
 
 
 
 
1
  import gradio as gr
2
+ import numpy as np
3
+ from vision_agent.tools import *
4
+ from pillow_heif import register_heif_opener
5
+ from typing import Dict
6
+
7
+ # Register HEIF opener
8
+ register_heif_opener()
9
+
10
+ import vision_agent as va
11
+
12
+ def analyze_mixed_boxes(image) -> Dict:
13
+ """
14
+ Analyzes an image containing mixed types of beverages, specifically water bottles and beverage cans.
15
+ 1) Loads the image from the provided path.
16
+ 2) Uses the 'countgd_object_detection' tool with the prompt 'water bottle, beverage can' to detect items.
17
+ 3) Splits detections into a top shelf and bottom shelf by comparing detection center to the image's vertical midpoint.
18
+ 4) Calculates how many water bottles and beverage cans are on each shelf and overall, along with average confidence scores.
19
+ 5) Overlays bounding boxes on the image to visualize detections, then saves the annotated image.
20
+ 6) Returns a dictionary summarizing the distribution of water bottles and beverage cans.
21
+
22
+ Parameters:
23
+ image (PIL.Image): The uploaded image.
24
+
25
+ Returns:
26
+ dict: Summary of the analysis with keys:
27
+ - total_items (int): total number of detected items
28
+ - total_water_bottles (int): total count of detected water bottles
29
+ - total_beverage_cans (int): total count of detected beverage cans
30
+ - top_shelf (dict): counts of bottles and cans on top shelf
31
+ - bottom_shelf (dict): counts of bottles and cans on bottom shelf
32
+ - confidence (dict): average confidence scores for bottles and cans
33
+ """
34
+ # Convert the uploaded image to a numpy array
35
+ image = np.array(image)
36
+ height, width = image.shape[:2]
37
+
38
+ # Detect water bottles and beverage cans
39
+ detections = countgd_object_detection("water bottle, beverage can", image)
40
+
41
+ # Separate detections into top shelf and bottom shelf
42
+ mid_height = height / 2
43
+ top_shelf_dets = []
44
+ bottom_shelf_dets = []
45
+ for det in detections:
46
+ cy = ((det["bbox"][1] + det["bbox"][3]) / 2) * height
47
+ if cy < mid_height:
48
+ top_shelf_dets.append(det)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  else:
50
+ bottom_shelf_dets.append(det)
 
 
 
 
 
 
 
 
 
 
51
 
52
+ # Count items by label and calculate average confidence
53
+ water_bottles = [det for det in detections if det["label"] == "water bottle"]
54
+ beverage_cans = [det for det in detections if det["label"] == "beverage can"]
 
 
 
 
 
 
55
 
56
+ avg_bottle_conf = (sum(det["score"] for det in water_bottles) / len(water_bottles)
57
+ if water_bottles else 0)
58
+ avg_can_conf = (sum(det["score"] for det in beverage_cans) / len(beverage_cans)
59
+ if beverage_cans else 0)
60
 
61
+ top_water_bottles = [det for det in top_shelf_dets if det["label"] == "water bottle"]
62
+ top_beverage_cans = [det for det in top_shelf_dets if det["label"] == "beverage can"]
63
+ bottom_water_bottles = [det for det in bottom_shelf_dets if det["label"] == "water bottle"]
64
+ bottom_beverage_cans = [det for det in bottom_shelf_dets if det["label"] == "beverage can"]
65
 
66
+ # Overlay bounding boxes and save the annotated image
67
+ annotated_image = overlay_bounding_boxes(image, detections)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ # Convert annotated image back to PIL format for Gradio output
70
+ annotated_image_pil = Image.fromarray(annotated_image)
71
+
72
+ # Return the result
73
+ result = {
74
+ "total_items": len(detections),
75
+ "total_water_bottles": len(water_bottles),
76
+ "total_beverage_cans": len(beverage_cans),
77
+ "top_shelf": {
78
+ "water_bottles": len(top_water_bottles),
79
+ "beverage_cans": len(top_beverage_cans),
80
+ },
81
+ "bottom_shelf": {
82
+ "water_bottles": len(bottom_water_bottles),
83
+ "beverage_cans": len(bottom_beverage_cans),
84
+ },
85
+ "confidence": {
86
+ "water_bottles": round(avg_bottle_conf, 2),
87
+ "beverage_cans": round(avg_can_conf, 2),
88
+ },
89
+ "annotated_image": annotated_image_pil # return annotated image for display
90
+ }
91
+
92
+ return result
93
+
94
+ # Gradio Interface
95
+ iface = gr.Interface(
96
+ fn=analyze_mixed_boxes,
97
+ inputs=gr.Image(type="pil"), # allows image upload
98
+ outputs=[gr.JSON(), gr.Image(type="pil")], # display result and annotated image
99
+ title="Beverage Detection Analysis",
100
+ description="Upload an image containing water bottles and beverage cans, and the tool will analyze the distribution on shelves and display an annotated image.",
101
+ )
102
 
103
  iface.launch()