dung-vpt-uney commited on
Commit
0ad7cf7
·
1 Parent(s): 799282e

Deploy latest CoRGI Gradio demo

Browse files
PROGRESS_LOG.md CHANGED
@@ -12,6 +12,7 @@
12
  - Updated `app.py` to fall back to `demo.queue()` when `concurrency_count` is unsupported, fixing the runtime error seen on Spaces.
13
  - Added ZeroGPU support: cached model/processor globals live on CUDA when available, a `@spaces.GPU`-decorated executor handles pipeline runs, and requirements now include the `spaces` SDK.
14
  - Introduced structured logging for the app (`app.py`) and pipeline execution to trace model loads, cache hits, and Gradio lifecycle events on Spaces.
 
15
 
16
  ## 2024-10-21
17
  - Updated default checkpoints to `Qwen/Qwen3-VL-8B-Thinking` and verified CLI/Gradio/test coverage.
 
12
  - Updated `app.py` to fall back to `demo.queue()` when `concurrency_count` is unsupported, fixing the runtime error seen on Spaces.
13
  - Added ZeroGPU support: cached model/processor globals live on CUDA when available, a `@spaces.GPU`-decorated executor handles pipeline runs, and requirements now include the `spaces` SDK.
14
  - Introduced structured logging for the app (`app.py`) and pipeline execution to trace model loads, cache hits, and Gradio lifecycle events on Spaces.
15
+ - Reworked the Gradio UI to show per-step panels with annotated evidence galleries, giving each CoRGI reasoning step its own window alongside the final synthesized answer.
16
 
17
  ## 2024-10-21
18
  - Updated default checkpoints to `Qwen/Qwen3-VL-8B-Thinking` and verified CLI/Gradio/test coverage.
corgi/__pycache__/gradio_app.cpython-313.pyc CHANGED
Binary files a/corgi/__pycache__/gradio_app.cpython-313.pyc and b/corgi/__pycache__/gradio_app.cpython-313.pyc differ
 
corgi/gradio_app.py CHANGED
@@ -2,18 +2,19 @@ from __future__ import annotations
2
 
3
  import logging
4
  from dataclasses import dataclass
5
- from typing import Callable, Optional
6
 
7
  try:
8
  import spaces # type: ignore
9
  except ImportError: # pragma: no cover - spaces library only on HF Spaces
10
  spaces = None # type: ignore
11
 
12
- from PIL import Image
13
 
14
  from .cli import DEFAULT_MODEL_ID
15
  from .pipeline import CoRGIPipeline, PipelineResult
16
  from .qwen_client import Qwen3VLClient, QwenGenerationConfig
 
17
 
18
 
19
  @dataclass
@@ -26,6 +27,22 @@ _PIPELINE_CACHE: dict[str, CoRGIPipeline] = {}
26
  _GLOBAL_FACTORY: Callable[[Optional[str]], CoRGIPipeline] | None = None
27
  logger = logging.getLogger("corgi.gradio_app")
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  def _default_factory(model_id: Optional[str]) -> CoRGIPipeline:
31
  config = QwenGenerationConfig(model_id=model_id or DEFAULT_MODEL_ID)
@@ -59,13 +76,107 @@ def _execute_pipeline(
59
  max_regions,
60
  )
61
  return pipeline.run(
62
- image=image.convert("RGB"),
63
  question=question,
64
  max_steps=max_steps,
65
  max_regions=max_regions,
66
  )
67
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  if spaces is not None:
70
 
71
  @spaces.GPU # type: ignore[attr-defined]
@@ -137,23 +248,38 @@ def _run_pipeline(
137
  max_steps: int,
138
  max_regions: int,
139
  model_id: Optional[str],
140
- ) -> tuple[PipelineState, str]:
 
 
 
 
141
  if image is None:
142
- return state or PipelineState(model_id=model_id or DEFAULT_MODEL_ID, pipeline=None), "Please provide an image before running the demo."
 
143
  if not question.strip():
144
- return state or PipelineState(model_id=model_id or DEFAULT_MODEL_ID, pipeline=None), "Please enter a question before running the demo."
145
- target_model = (model_id or DEFAULT_MODEL_ID).strip() or DEFAULT_MODEL_ID
 
146
  logger.info("Received request for model_id=%s", target_model)
147
- result = _execute_pipeline_gpu(
148
- image=image.convert("RGB"),
149
- question=question.strip(),
150
- max_steps=int(max_steps),
151
- max_regions=int(max_regions),
152
- model_id=target_model,
153
- )
 
 
 
 
 
 
 
 
 
154
  new_state = PipelineState(model_id=target_model, pipeline=_PIPELINE_CACHE.get(target_model))
155
- markdown = format_result_markdown(result)
156
- return new_state, markdown
157
 
158
 
159
  def build_demo(
@@ -198,11 +324,24 @@ def build_demo(
198
  run_button = gr.Button("Run CoRGI")
199
 
200
  with gr.Column(scale=1, min_width=320):
201
- result_markdown = gr.Markdown(value="Upload an image and ask a question to begin.")
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
  def _on_submit(state_data, image, question, model_id, max_steps, max_regions):
204
  pipeline_state = state_data if isinstance(state_data, PipelineState) else None
205
- new_state, markdown = _run_pipeline(
206
  pipeline_state,
207
  image,
208
  question,
@@ -210,12 +349,20 @@ def build_demo(
210
  int(max_regions),
211
  model_id if model_id else None,
212
  )
213
- return new_state, markdown
 
 
 
 
 
 
 
 
214
 
215
  run_button.click(
216
  fn=_on_submit,
217
  inputs=[state, image_input, question_input, model_id_input, max_steps_slider, max_regions_slider],
218
- outputs=[state, result_markdown],
219
  )
220
 
221
  return demo
 
2
 
3
  import logging
4
  from dataclasses import dataclass
5
+ from typing import Callable, Dict, List, Optional, Tuple
6
 
7
  try:
8
  import spaces # type: ignore
9
  except ImportError: # pragma: no cover - spaces library only on HF Spaces
10
  spaces = None # type: ignore
11
 
12
+ from PIL import Image, ImageDraw
13
 
14
  from .cli import DEFAULT_MODEL_ID
15
  from .pipeline import CoRGIPipeline, PipelineResult
16
  from .qwen_client import Qwen3VLClient, QwenGenerationConfig
17
+ from .types import GroundedEvidence
18
 
19
 
20
  @dataclass
 
27
  _GLOBAL_FACTORY: Callable[[Optional[str]], CoRGIPipeline] | None = None
28
  logger = logging.getLogger("corgi.gradio_app")
29
 
30
+ MAX_UI_STEPS = 6
31
+ GALLERY_MAX_DIM = 768
32
+ EVIDENCE_COLORS: Tuple[Tuple[int, int, int], ...] = (
33
+ (244, 67, 54), # red
34
+ (255, 193, 7), # amber
35
+ (76, 175, 80), # green
36
+ (33, 150, 243), # blue
37
+ (156, 39, 176), # purple
38
+ (255, 87, 34), # deep orange
39
+ )
40
+
41
+ try:
42
+ _THUMBNAIL_RESAMPLE = Image.Resampling.LANCZOS # type: ignore[attr-defined]
43
+ except AttributeError: # pragma: no cover - Pillow < 9.1
44
+ _THUMBNAIL_RESAMPLE = Image.LANCZOS # type: ignore
45
+
46
 
47
  def _default_factory(model_id: Optional[str]) -> CoRGIPipeline:
48
  config = QwenGenerationConfig(model_id=model_id or DEFAULT_MODEL_ID)
 
76
  max_regions,
77
  )
78
  return pipeline.run(
79
+ image=image,
80
  question=question,
81
  max_steps=max_steps,
82
  max_regions=max_regions,
83
  )
84
 
85
 
86
+ def _group_evidence_by_step(evidences: List[GroundedEvidence]) -> Dict[int, List[GroundedEvidence]]:
87
+ grouped: Dict[int, List[GroundedEvidence]] = {}
88
+ for ev in evidences:
89
+ grouped.setdefault(ev.step_index, []).append(ev)
90
+ return grouped
91
+
92
+
93
+ def _format_evidence_caption(evidence: GroundedEvidence) -> str:
94
+ bbox_str = ", ".join(f"{coord:.2f}" for coord in evidence.bbox)
95
+ parts = [f"Step {evidence.step_index}"]
96
+ if evidence.description:
97
+ parts.append(evidence.description)
98
+ if evidence.confidence is not None:
99
+ parts.append(f"Confidence: {evidence.confidence:.2f}")
100
+ parts.append(f"BBox: ({bbox_str})")
101
+ return "\n".join(parts)
102
+
103
+
104
+ def _annotate_evidence_image(
105
+ image: Image.Image,
106
+ evidence: GroundedEvidence,
107
+ color: Tuple[int, int, int],
108
+ ) -> Image.Image:
109
+ base = image.copy().convert("RGBA")
110
+ overlay = Image.new("RGBA", base.size, (0, 0, 0, 0))
111
+ draw = ImageDraw.Draw(overlay)
112
+ width, height = base.size
113
+
114
+ x1 = max(0, min(int(evidence.bbox[0] * width), width - 1))
115
+ y1 = max(0, min(int(evidence.bbox[1] * height), height - 1))
116
+ x2 = max(0, min(int(evidence.bbox[2] * width), width - 1))
117
+ y2 = max(0, min(int(evidence.bbox[3] * height), height - 1))
118
+ x1, x2 = sorted((x1, x2))
119
+ y1, y2 = sorted((y1, y2))
120
+
121
+ outline_width = max(2, int(min(width, height) * 0.005))
122
+ rgba_color = color + (255,)
123
+ fill_color = color + (64,)
124
+
125
+ draw.rectangle([x1, y1, x2, y2], fill=fill_color, outline=rgba_color, width=outline_width)
126
+ annotated = Image.alpha_composite(base, overlay).convert("RGB")
127
+ if max(annotated.size) > GALLERY_MAX_DIM:
128
+ annotated.thumbnail((GALLERY_MAX_DIM, GALLERY_MAX_DIM), _THUMBNAIL_RESAMPLE)
129
+ return annotated
130
+
131
+
132
+ def _empty_ui_payload(message: str, max_slots: int = MAX_UI_STEPS) -> Tuple[str, List[str], List[List[Tuple[Image.Image, str]]]]:
133
+ return (
134
+ f"### Final Answer\n{message}",
135
+ ["_No step data available._" for _ in range(max_slots)],
136
+ [[] for _ in range(max_slots)],
137
+ )
138
+
139
+
140
+ def _prepare_ui_payload(
141
+ image: Image.Image,
142
+ result: PipelineResult,
143
+ max_slots: int = MAX_UI_STEPS,
144
+ ) -> Tuple[str, List[str], List[List[Tuple[Image.Image, str]]]]:
145
+ answer_text = f"### Final Answer\n{result.answer or '(no answer returned)'}"
146
+ evidences_by_step = _group_evidence_by_step(result.evidence)
147
+
148
+ step_markdowns: List[str] = []
149
+ step_galleries: List[List[Tuple[Image.Image, str]]] = []
150
+
151
+ for slot_index in range(max_slots):
152
+ if slot_index < len(result.steps):
153
+ step = result.steps[slot_index]
154
+ evidences = evidences_by_step.get(step.index, [])
155
+ lines = [
156
+ f"**Step {step.index}:** {step.statement}",
157
+ f"- Needs vision: {'yes' if step.needs_vision else 'no'}",
158
+ ]
159
+ if step.reason:
160
+ lines.append(f"- Reason: {step.reason}")
161
+ if evidences:
162
+ lines.append(f"- Evidence items: {len(evidences)} (see gallery below)")
163
+ else:
164
+ lines.append("- No visual evidence returned for this step.")
165
+ step_markdowns.append("\n".join(lines))
166
+
167
+ gallery_entries: List[Tuple[Image.Image, str]] = []
168
+ for idx, evidence in enumerate(evidences):
169
+ color = EVIDENCE_COLORS[idx % len(EVIDENCE_COLORS)]
170
+ annotated = _annotate_evidence_image(image, evidence, color)
171
+ gallery_entries.append((annotated, _format_evidence_caption(evidence)))
172
+ step_galleries.append(gallery_entries)
173
+ else:
174
+ step_markdowns.append("_No step data returned._")
175
+ step_galleries.append([])
176
+
177
+ return answer_text, step_markdowns, step_galleries
178
+
179
+
180
  if spaces is not None:
181
 
182
  @spaces.GPU # type: ignore[attr-defined]
 
248
  max_steps: int,
249
  max_regions: int,
250
  model_id: Optional[str],
251
+ ) -> tuple[PipelineState, Tuple[str, List[str], List[List[Tuple[Image.Image, str]]]]]:
252
+ target_model = (model_id or DEFAULT_MODEL_ID).strip() or DEFAULT_MODEL_ID
253
+ cached_pipeline = _PIPELINE_CACHE.get(target_model)
254
+ base_state = state or PipelineState(model_id=target_model, pipeline=cached_pipeline)
255
+
256
  if image is None:
257
+ logger.info("Request skipped: no image provided.")
258
+ return base_state, _empty_ui_payload("Please provide an image before running the demo.")
259
  if not question.strip():
260
+ logger.info("Request skipped: question empty.")
261
+ return base_state, _empty_ui_payload("Please enter a question before running the demo.")
262
+
263
  logger.info("Received request for model_id=%s", target_model)
264
+ rgb_image = image.convert("RGB")
265
+
266
+ try:
267
+ result = _execute_pipeline_gpu(
268
+ image=rgb_image,
269
+ question=question.strip(),
270
+ max_steps=int(max_steps),
271
+ max_regions=int(max_regions),
272
+ model_id=target_model,
273
+ )
274
+ except Exception as exc: # pragma: no cover - defensive error handling
275
+ logger.exception("Pipeline execution failed: %s", exc)
276
+ return PipelineState(model_id=target_model, pipeline=_PIPELINE_CACHE.get(target_model)), _empty_ui_payload(
277
+ f"Pipeline error: {exc}"
278
+ )
279
+
280
  new_state = PipelineState(model_id=target_model, pipeline=_PIPELINE_CACHE.get(target_model))
281
+ payload = _prepare_ui_payload(rgb_image, result, MAX_UI_STEPS)
282
+ return new_state, payload
283
 
284
 
285
  def build_demo(
 
324
  run_button = gr.Button("Run CoRGI")
325
 
326
  with gr.Column(scale=1, min_width=320):
327
+ answer_markdown = gr.Markdown(value="### Final Answer\nUpload an image and ask a question to begin.")
328
+ step_markdown_components: List["gr.Markdown"] = []
329
+ step_gallery_components: List["gr.Gallery"] = []
330
+ for slot in range(MAX_UI_STEPS):
331
+ with gr.Accordion(f"Step {slot + 1}", open=False):
332
+ step_md = gr.Markdown("_No step data available._")
333
+ gallery = gr.Gallery(
334
+ label="Visual evidence",
335
+ columns=2,
336
+ height=280,
337
+ allow_preview=True,
338
+ )
339
+ step_markdown_components.append(step_md)
340
+ step_gallery_components.append(gallery)
341
 
342
  def _on_submit(state_data, image, question, model_id, max_steps, max_regions):
343
  pipeline_state = state_data if isinstance(state_data, PipelineState) else None
344
+ new_state, payload = _run_pipeline(
345
  pipeline_state,
346
  image,
347
  question,
 
349
  int(max_regions),
350
  model_id if model_id else None,
351
  )
352
+ answer_text, step_texts, gallery_payloads = payload
353
+ outputs: List[object] = [new_state, answer_text]
354
+ outputs.extend(step_texts)
355
+ outputs.extend(gallery_payloads)
356
+ return outputs
357
+
358
+ output_components = [state, answer_markdown]
359
+ output_components.extend(step_markdown_components)
360
+ output_components.extend(step_gallery_components)
361
 
362
  run_button.click(
363
  fn=_on_submit,
364
  inputs=[state, image_input, question_input, model_id_input, max_steps_slider, max_regions_slider],
365
+ outputs=output_components,
366
  )
367
 
368
  return demo