Spaces:

tuandunghcmut
/

corgi-qwen3-vl-demo

Runtime error

corgi-qwen3-vl-demo / corgi /pipeline.py

dung-vpt-uney

Deploy latest CoRGI Gradio demo

b6a01d6 19 days ago

2.6 kB

	from __future__ import annotations

	from dataclasses import dataclass
	from typing import List, Protocol

	from PIL import Image

	from .types import (
	GroundedEvidence,
	ReasoningStep,
	evidences_to_serializable,
	steps_to_serializable,
	)


	class SupportsQwenClient(Protocol):
	"""Protocol describing the methods required from a Qwen3-VL client."""

	def structured_reasoning(self, image: Image.Image, question: str, max_steps: int) -> List[ReasoningStep]:
	...

	def extract_step_evidence(
	self,
	image: Image.Image,
	question: str,
	step: ReasoningStep,
	max_regions: int,
	) -> List[GroundedEvidence]:
	...

	def synthesize_answer(
	self,
	image: Image.Image,
	question: str,
	steps: List[ReasoningStep],
	evidences: List[GroundedEvidence],
	) -> str:
	...


	@dataclass(frozen=True)
	class PipelineResult:
	"""Aggregated output of the CoRGI pipeline."""

	question: str
	steps: List[ReasoningStep]
	evidence: List[GroundedEvidence]
	answer: str

	def to_json(self) -> dict:
	return {
	"question": self.question,
	"steps": steps_to_serializable(self.steps),
	"evidence": evidences_to_serializable(self.evidence),
	"answer": self.answer,
	}


	class CoRGIPipeline:
	"""Orchestrates the CoRGI reasoning pipeline using a Qwen3-VL client."""

	def __init__(self, vlm_client: SupportsQwenClient):
	if vlm_client is None:
	raise ValueError("A Qwen3-VL client instance must be provided.")
	self._vlm = vlm_client

	def run(
	self,
	image: Image.Image,
	question: str,
	max_steps: int = 4,
	max_regions: int = 4,
	) -> PipelineResult:
	steps = self._vlm.structured_reasoning(image=image, question=question, max_steps=max_steps)
	evidences: List[GroundedEvidence] = []
	for step in steps:
	if not step.needs_vision:
	continue
	step_evs = self._vlm.extract_step_evidence(
	image=image,
	question=question,
	step=step,
	max_regions=max_regions,
	)
	if not step_evs:
	continue
	evidences.extend(step_evs[:max_regions])
	answer = self._vlm.synthesize_answer(image=image, question=question, steps=steps, evidences=evidences)
	return PipelineResult(question=question, steps=steps, evidence=evidences, answer=answer)


	__all__ = ["CoRGIPipeline", "PipelineResult"]