|
|
""" |
|
|
โก Speed-Optimized Multi-Agent RAG System for Complex Questions |
|
|
๋ณ๋ ฌ ์ฒ๋ฆฌ, ๋์ ํ์ดํ๋ผ์ธ์ผ๋ก ๋ณต์กํ ์ง๋ฌธ๋ ๋น ๋ฅด๊ฒ ์ฒ๋ฆฌ |
|
|
Enhanced with multi-language support and improved error handling |
|
|
(์บ์ฑ ๊ธฐ๋ฅ ์ ๊ฑฐ ๋ฒ์ ) |
|
|
""" |
|
|
|
|
|
import os |
|
|
import json |
|
|
import time |
|
|
import asyncio |
|
|
import hashlib |
|
|
import re |
|
|
import sys |
|
|
from typing import Optional, List, Dict, Any, Tuple, Generator, AsyncGenerator |
|
|
from datetime import datetime, timedelta |
|
|
from enum import Enum |
|
|
from collections import deque |
|
|
import threading |
|
|
import queue |
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed |
|
|
import aiohttp |
|
|
|
|
|
import requests |
|
|
import gradio as gr |
|
|
from pydantic import BaseModel, Field |
|
|
from dotenv import load_dotenv |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AgentRole(Enum): |
|
|
"""์์ด์ ํธ ์ญํ ์ ์""" |
|
|
SUPERVISOR = "supervisor" |
|
|
CREATIVE = "creative" |
|
|
CRITIC = "critic" |
|
|
FINALIZER = "finalizer" |
|
|
|
|
|
|
|
|
class ExecutionMode(Enum): |
|
|
"""์คํ ๋ชจ๋ ์ ์""" |
|
|
PARALLEL = "parallel" |
|
|
SEQUENTIAL = "sequential" |
|
|
HYBRID = "hybrid" |
|
|
|
|
|
|
|
|
class Message(BaseModel): |
|
|
role: str |
|
|
content: str |
|
|
timestamp: Optional[datetime] = None |
|
|
|
|
|
|
|
|
class AgentResponse(BaseModel): |
|
|
role: AgentRole |
|
|
content: str |
|
|
processing_time: float |
|
|
metadata: Optional[Dict] = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LanguageDetector: |
|
|
"""์ธ์ด ๊ฐ์ง ๋ฐ ์ฒ๋ฆฌ ์ ํธ๋ฆฌํฐ""" |
|
|
|
|
|
@staticmethod |
|
|
def detect_language(text: str) -> str: |
|
|
"""๊ฐ๋จํ ์ธ์ด ๊ฐ์ง""" |
|
|
import re |
|
|
|
|
|
|
|
|
korean_pattern = re.compile('[๊ฐ-ํฃ]+') |
|
|
|
|
|
japanese_pattern = re.compile('[ใ-ใ]+|[ใก-ใดใผ]+') |
|
|
|
|
|
chinese_pattern = re.compile('[\u4e00-\u9fff]+') |
|
|
|
|
|
|
|
|
text_length = len(text) |
|
|
if text_length == 0: |
|
|
return 'en' |
|
|
|
|
|
korean_chars = len(korean_pattern.findall(text)) |
|
|
japanese_chars = len(japanese_pattern.findall(text)) |
|
|
chinese_chars = len(chinese_pattern.findall(text)) |
|
|
|
|
|
|
|
|
if korean_chars > 0 and (korean_chars / text_length > 0.1): |
|
|
return 'ko' |
|
|
|
|
|
elif japanese_chars > 0: |
|
|
return 'ja' |
|
|
|
|
|
elif chinese_chars > 0: |
|
|
return 'zh' |
|
|
else: |
|
|
return 'en' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class AsyncBraveSearch: |
|
|
"""๋น๋๊ธฐ Brave ๊ฒ์ ํด๋ผ์ด์ธํธ with retry logic""" |
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
|
self.api_key = api_key or os.getenv("BRAVE_SEARCH_API_KEY") |
|
|
self.base_url = "https://api.search.brave.com/res/v1/web/search" |
|
|
self.max_retries = 3 |
|
|
|
|
|
async def search_async(self, query: str, count: int = 5, lang: str = 'ko') -> List[Dict]: |
|
|
"""๋น๋๊ธฐ ๊ฒ์ with retry""" |
|
|
if not self.api_key: |
|
|
return [] |
|
|
|
|
|
headers = { |
|
|
"Accept": "application/json", |
|
|
"X-Subscription-Token": self.api_key |
|
|
} |
|
|
|
|
|
|
|
|
lang_params = { |
|
|
'ko': {"search_lang": "ko", "country": "KR"}, |
|
|
'en': {"search_lang": "en", "country": "US"}, |
|
|
'ja': {"search_lang": "ja", "country": "JP"}, |
|
|
'zh': {"search_lang": "zh", "country": "CN"} |
|
|
} |
|
|
|
|
|
params = { |
|
|
"q": query, |
|
|
"count": count, |
|
|
"text_decorations": False, |
|
|
**lang_params.get(lang, lang_params['en']) |
|
|
} |
|
|
|
|
|
for attempt in range(self.max_retries): |
|
|
try: |
|
|
async with aiohttp.ClientSession() as session: |
|
|
async with session.get( |
|
|
self.base_url, |
|
|
headers=headers, |
|
|
params=params, |
|
|
timeout=aiohttp.ClientTimeout(total=5) |
|
|
) as response: |
|
|
if response.status == 200: |
|
|
data = await response.json() |
|
|
|
|
|
results = [] |
|
|
if "web" in data and "results" in data["web"]: |
|
|
for item in data["web"]["results"][:count]: |
|
|
results.append({ |
|
|
"title": item.get("title", ""), |
|
|
"url": item.get("url", ""), |
|
|
"description": item.get("description", ""), |
|
|
"age": item.get("age", "") |
|
|
}) |
|
|
|
|
|
return results |
|
|
elif response.status == 429: |
|
|
await asyncio.sleep(2 ** attempt) |
|
|
continue |
|
|
except aiohttp.ClientError as e: |
|
|
if attempt < self.max_retries - 1: |
|
|
await asyncio.sleep(2 ** attempt) |
|
|
continue |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
return [] |
|
|
|
|
|
async def batch_search(self, queries: List[str], lang: str = 'ko') -> List[List[Dict]]: |
|
|
"""์ฌ๋ฌ ๊ฒ์์ ๋ฐฐ์น๋ก ์ฒ๋ฆฌ""" |
|
|
tasks = [self.search_async(q, lang=lang) for q in queries] |
|
|
results = await asyncio.gather(*tasks, return_exceptions=True) |
|
|
|
|
|
return [r if not isinstance(r, Exception) else [] for r in results] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OptimizedFireworksClient: |
|
|
"""์ต์ ํ๋ LLM ํด๋ผ์ด์ธํธ with language support""" |
|
|
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
|
self.api_key = api_key or os.getenv("FIREWORKS_API_KEY") |
|
|
if not self.api_key: |
|
|
raise ValueError("FIREWORKS_API_KEY is required!") |
|
|
|
|
|
self.base_url = "https://api.fireworks.ai/inference/v1/chat/completions" |
|
|
self.headers = { |
|
|
"Accept": "application/json", |
|
|
"Content-Type": "application/json", |
|
|
"Authorization": f"Bearer {self.api_key}" |
|
|
} |
|
|
|
|
|
|
|
|
self.model = "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507" |
|
|
self.max_retries = 3 |
|
|
|
|
|
def compress_prompt(self, text: str, max_length: int = 2000) -> str: |
|
|
"""ํ๋กฌํํธ ์์ถ""" |
|
|
if len(text) <= max_length: |
|
|
return text |
|
|
|
|
|
|
|
|
sentences = text.split('.') |
|
|
compressed = [] |
|
|
current_length = 0 |
|
|
|
|
|
for sentence in sentences: |
|
|
if current_length + len(sentence) > max_length: |
|
|
break |
|
|
compressed.append(sentence) |
|
|
current_length += len(sentence) |
|
|
|
|
|
return '.'.join(compressed) |
|
|
|
|
|
async def chat_stream_async( |
|
|
self, |
|
|
messages: List[Dict], |
|
|
**kwargs |
|
|
) -> AsyncGenerator[str, None]: |
|
|
"""๋น๋๊ธฐ ์คํธ๋ฆฌ๋ฐ ๋ํ with retry""" |
|
|
|
|
|
payload = { |
|
|
"model": self.model, |
|
|
"messages": messages, |
|
|
"max_tokens": kwargs.get("max_tokens", 2000), |
|
|
"temperature": kwargs.get("temperature", 0.7), |
|
|
"top_p": kwargs.get("top_p", 1.0), |
|
|
"top_k": kwargs.get("top_k", 40), |
|
|
"stream": True |
|
|
} |
|
|
|
|
|
for attempt in range(self.max_retries): |
|
|
try: |
|
|
async with aiohttp.ClientSession() as session: |
|
|
async with session.post( |
|
|
self.base_url, |
|
|
headers={**self.headers, "Accept": "text/event-stream"}, |
|
|
json=payload, |
|
|
timeout=aiohttp.ClientTimeout(total=30) |
|
|
) as response: |
|
|
async for line in response.content: |
|
|
line_str = line.decode('utf-8').strip() |
|
|
if line_str.startswith("data: "): |
|
|
data_str = line_str[6:] |
|
|
if data_str == "[DONE]": |
|
|
break |
|
|
try: |
|
|
data = json.loads(data_str) |
|
|
if "choices" in data and len(data["choices"]) > 0: |
|
|
delta = data["choices"][0].get("delta", {}) |
|
|
if "content" in delta: |
|
|
yield delta["content"] |
|
|
except json.JSONDecodeError: |
|
|
continue |
|
|
return |
|
|
except aiohttp.ClientError as e: |
|
|
if attempt < self.max_retries - 1: |
|
|
await asyncio.sleep(2 ** attempt) |
|
|
continue |
|
|
else: |
|
|
yield f"Error after {self.max_retries} attempts: {str(e)}" |
|
|
except Exception as e: |
|
|
yield f"Unexpected error: {str(e)}" |
|
|
break |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class LightweightReasoningChain: |
|
|
"""๋น ๋ฅธ ์ถ๋ก ์ ์ํ ํ
ํ๋ฆฟ ๊ธฐ๋ฐ ์์คํ
""" |
|
|
|
|
|
def __init__(self): |
|
|
self.templates = { |
|
|
"ko": { |
|
|
"problem_solving": { |
|
|
"steps": ["๋ฌธ์ ๋ถํด", "ํต์ฌ ์์ธ", "ํด๊ฒฐ ๋ฐฉ์", "๊ตฌํ ์ ๋ต"], |
|
|
"prompt": "์ฒด๊ณ์ ์ผ๋ก ๋จ๊ณ๋ณ๋ก ๋ถ์ํ๊ณ ํด๊ฒฐ์ฑ
์ ์ ์ํ์ธ์." |
|
|
}, |
|
|
"creative_thinking": { |
|
|
"steps": ["๊ธฐ์กด ์ ๊ทผ", "์ฐฝ์์ ๋์", "ํ์ ํฌ์ธํธ", "์คํ ๋ฐฉ๋ฒ"], |
|
|
"prompt": "๊ธฐ์กด ๋ฐฉ์์ ๋์ด์ ์ฐฝ์์ ์ด๊ณ ํ์ ์ ์ธ ์ ๊ทผ์ ์ ์ํ์ธ์." |
|
|
}, |
|
|
"critical_analysis": { |
|
|
"steps": ["ํํฉ ํ๊ฐ", "๊ฐ์ /์ฝ์ ", "๊ธฐํ/์ํ", "๊ฐ์ ๋ฐฉํฅ"], |
|
|
"prompt": "๋นํ์ ๊ด์ ์์ ์ฒ ์ ํ ๋ถ์ํ๊ณ ๊ฐ์ ์ ์ ๋์ถํ์ธ์." |
|
|
} |
|
|
}, |
|
|
"en": { |
|
|
"problem_solving": { |
|
|
"steps": ["Problem Breakdown", "Key Factors", "Solutions", "Implementation Strategy"], |
|
|
"prompt": "Systematically analyze step by step and provide solutions." |
|
|
}, |
|
|
"creative_thinking": { |
|
|
"steps": ["Traditional Approach", "Creative Alternatives", "Innovation Points", "Execution Method"], |
|
|
"prompt": "Provide creative and innovative approaches beyond conventional methods." |
|
|
}, |
|
|
"critical_analysis": { |
|
|
"steps": ["Current Assessment", "Strengths/Weaknesses", "Opportunities/Threats", "Improvement Direction"], |
|
|
"prompt": "Thoroughly analyze from a critical perspective and derive improvements." |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
def get_reasoning_structure(self, query_type: str, lang: str = 'ko') -> Dict: |
|
|
"""์ฟผ๋ฆฌ ์ ํ์ ๋ง๋ ์ถ๋ก ๊ตฌ์กฐ ๋ฐํ""" |
|
|
lang_templates = self.templates.get(lang, self.templates['en']) |
|
|
return lang_templates.get(query_type, lang_templates["problem_solving"]) |
|
|
|
|
|
def get_reasoning_pattern(self, query: str, lang: str = 'ko') -> Optional[Dict]: |
|
|
"""์ฟผ๋ฆฌ์ ์ ํฉํ ์ถ๋ก ํจํด ๋ฐํ""" |
|
|
query_lower = query.lower() |
|
|
|
|
|
|
|
|
pattern_keywords = { |
|
|
'ko': { |
|
|
'problem_solving': ['ํด๊ฒฐ', '๋ฐฉ๋ฒ', '์ ๋ต', '๊ณํ'], |
|
|
'creative_thinking': ['์ฐฝ์์ ', 'ํ์ ์ ', '์๋ก์ด', '์์ด๋์ด'], |
|
|
'critical_analysis': ['๋ถ์', 'ํ๊ฐ', '๋น๊ต', '์ํฅ'] |
|
|
}, |
|
|
'en': { |
|
|
'problem_solving': ['solve', 'solution', 'strategy', 'plan'], |
|
|
'creative_thinking': ['creative', 'innovative', 'novel', 'idea'], |
|
|
'critical_analysis': ['analyze', 'evaluate', 'compare', 'impact'] |
|
|
} |
|
|
} |
|
|
|
|
|
keywords = pattern_keywords.get(lang, pattern_keywords['en']) |
|
|
|
|
|
for pattern_type, words in keywords.items(): |
|
|
if any(word in query_lower for word in words): |
|
|
return self.get_reasoning_structure(pattern_type, lang) |
|
|
|
|
|
return self.get_reasoning_structure('problem_solving', lang) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class QualityChecker: |
|
|
"""ํ์ง ์ฒดํฌ ๋ฐ ์กฐ๊ธฐ ์ข
๋ฃ ๊ฒฐ์ """ |
|
|
|
|
|
def __init__(self, min_quality: float = 0.75): |
|
|
self.min_quality = min_quality |
|
|
self.quality_metrics = { |
|
|
"length": 0.2, |
|
|
"structure": 0.3, |
|
|
"completeness": 0.3, |
|
|
"clarity": 0.2 |
|
|
} |
|
|
|
|
|
def evaluate_response(self, response: str, query: str, lang: str = 'ko') -> Tuple[float, bool]: |
|
|
"""์๋ต ํ์ง ํ๊ฐ (์ธ์ด๋ณ)""" |
|
|
scores = {} |
|
|
|
|
|
|
|
|
min_length = {'ko': 500, 'en': 400, 'ja': 400, 'zh': 300} |
|
|
target_length = min_length.get(lang, 400) |
|
|
|
|
|
|
|
|
scores["length"] = min(len(response) / target_length, 1.0) |
|
|
|
|
|
|
|
|
structure_markers = { |
|
|
'ko': ["1.", "2.", "โข", "-", "์ฒซ์งธ", "๋์งธ", "๊ฒฐ๋ก ", "์์ฝ"], |
|
|
'en': ["1.", "2.", "โข", "-", "First", "Second", "Conclusion", "Summary"], |
|
|
'ja': ["1.", "2.", "โข", "-", "็ฌฌไธ", "็ฌฌไบ", "็ต่ซ", "่ฆ็ด"], |
|
|
'zh': ["1.", "2.", "โข", "-", "็ฌฌไธ", "็ฌฌไบ", "็ป่ฎบ", "ๆป็ป"] |
|
|
} |
|
|
|
|
|
markers = structure_markers.get(lang, structure_markers['en']) |
|
|
scores["structure"] = sum(1 for m in markers if m in response) / len(markers) |
|
|
|
|
|
|
|
|
query_words = set(query.split()) |
|
|
response_words = set(response.split()) |
|
|
scores["completeness"] = len(query_words & response_words) / max(len(query_words), 1) |
|
|
|
|
|
|
|
|
sentence_delimiters = { |
|
|
'ko': '.', |
|
|
'en': '.', |
|
|
'ja': 'ใ', |
|
|
'zh': 'ใ' |
|
|
} |
|
|
delimiter = sentence_delimiters.get(lang, '.') |
|
|
sentences = response.split(delimiter) |
|
|
avg_sentence_length = sum(len(s.split()) for s in sentences) / max(len(sentences), 1) |
|
|
scores["clarity"] = min(avg_sentence_length / 20, 1.0) |
|
|
|
|
|
|
|
|
total_score = sum( |
|
|
scores[metric] * weight |
|
|
for metric, weight in self.quality_metrics.items() |
|
|
) |
|
|
|
|
|
should_continue = total_score < self.min_quality |
|
|
|
|
|
return total_score, should_continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OptimizedStreaming: |
|
|
"""์คํธ๋ฆฌ๋ฐ ๋ฒํผ ์ต์ ํ with adaptive buffering""" |
|
|
|
|
|
def __init__(self, chunk_size: int = 20, flush_interval: float = 0.05): |
|
|
self.chunk_size = chunk_size |
|
|
self.flush_interval = flush_interval |
|
|
self.buffer = "" |
|
|
self.last_flush = time.time() |
|
|
self.adaptive_size = chunk_size |
|
|
|
|
|
async def buffer_and_yield( |
|
|
self, |
|
|
stream: AsyncGenerator[str, None], |
|
|
adaptive: bool = True |
|
|
) -> AsyncGenerator[str, None]: |
|
|
"""๋ฒํผ๋ง๋ ์คํธ๋ฆฌ๋ฐ with adaptive sizing""" |
|
|
|
|
|
chunk_count = 0 |
|
|
async for chunk in stream: |
|
|
self.buffer += chunk |
|
|
current_time = time.time() |
|
|
chunk_count += 1 |
|
|
|
|
|
|
|
|
if adaptive and chunk_count % 10 == 0: |
|
|
time_diff = current_time - self.last_flush |
|
|
if time_diff < 0.02: |
|
|
self.adaptive_size = min(self.adaptive_size + 5, 100) |
|
|
elif time_diff > 0.1: |
|
|
self.adaptive_size = max(self.adaptive_size - 5, 10) |
|
|
|
|
|
if (len(self.buffer) >= self.adaptive_size or |
|
|
current_time - self.last_flush >= self.flush_interval): |
|
|
|
|
|
yield self.buffer |
|
|
self.buffer = "" |
|
|
self.last_flush = current_time |
|
|
|
|
|
|
|
|
if self.buffer: |
|
|
yield self.buffer |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ResponseCleaner: |
|
|
"""์๋ต ์ ๋ฆฌ ๋ฐ ํฌ๋งทํ
""" |
|
|
|
|
|
@staticmethod |
|
|
def clean_response(response: str) -> str: |
|
|
"""๋ถํ์ํ ๋งํฌ์
์ ๊ฑฐ ๊ฐํ""" |
|
|
|
|
|
response = re.sub(r'^#{1,6}\s+', '', response, flags=re.MULTILINE) |
|
|
|
|
|
|
|
|
response = re.sub(r'\*{2,}|_{2,}|-{3,}', '', response) |
|
|
|
|
|
|
|
|
response = re.sub(r'\n{3,}', '\n\n', response) |
|
|
|
|
|
|
|
|
unwanted_patterns = [ |
|
|
r'\| --- # ๐ฑ \*\*์ต์ข
ํตํฉ ๋ต๋ณ:', |
|
|
r'\*\*โ์ค๋ฅ: ---', |
|
|
r'^\s*\*\*\[.*?\]\*\*\s*', |
|
|
r'^\s*###\s*', |
|
|
r'^\s*##\s*', |
|
|
r'^\s*#\s*' |
|
|
] |
|
|
|
|
|
for pattern in unwanted_patterns: |
|
|
response = re.sub(pattern, '', response, flags=re.MULTILINE) |
|
|
|
|
|
return response.strip() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SpeedOptimizedMultiAgentSystem: |
|
|
"""์๋ ์ต์ ํ๋ ๋ฉํฐ ์์ด์ ํธ ์์คํ
(์บ์ฑ ์์)""" |
|
|
|
|
|
def __init__(self): |
|
|
self.llm = OptimizedFireworksClient() |
|
|
self.search = AsyncBraveSearch() |
|
|
self.reasoning = LightweightReasoningChain() |
|
|
self.quality_checker = QualityChecker() |
|
|
self.streaming = OptimizedStreaming() |
|
|
self.language_detector = LanguageDetector() |
|
|
self.response_cleaner = ResponseCleaner() |
|
|
|
|
|
|
|
|
self.executor = ThreadPoolExecutor(max_workers=4) |
|
|
|
|
|
def _init_compact_prompts(self, lang: str = 'ko') -> Dict: |
|
|
"""์์ถ๋ ๊ณ ํจ์จ ํ๋กฌํํธ (์ธ์ด๋ณ)""" |
|
|
prompts = { |
|
|
'ko': { |
|
|
AgentRole.SUPERVISOR: """[๊ฐ๋
์-๊ตฌ์กฐ์ค๊ณ] |
|
|
์ฆ์๋ถ์: ํต์ฌ์๋+ํ์์ ๋ณด+๋ต๋ณ๊ตฌ์กฐ |
|
|
์ถ๋ ฅ: 5๊ฐ ํต์ฌํฌ์ธํธ(๊ฐ 1๋ฌธ์ฅ) |
|
|
์ถ๋ก ์ฒด๊ณ ๋ช
์""", |
|
|
|
|
|
AgentRole.CREATIVE: """[์ฐฝ์์ฑ์์ฑ์] |
|
|
์
๋ ฅ๊ตฌ์กฐ ๋ฐ๋ผ ์ฐฝ์์ ํ์ฅ |
|
|
์ค์ฉ์์+ํ์ ์ ๊ทผ+๊ตฌ์ฒด์กฐ์ธ |
|
|
๋ถํ์์ค๋ช
์ ๊ฑฐ""", |
|
|
|
|
|
AgentRole.CRITIC: """[๋นํ์-๊ฒ์ฆ] |
|
|
์ ์๊ฒํ : ์ ํ์ฑ/๋
ผ๋ฆฌ์ฑ/์ค์ฉ์ฑ |
|
|
๊ฐ์ ํฌ์ธํธ 3๊ฐ๋ง |
|
|
๊ฐ 2๋ฌธ์ฅ ์ด๋ด""", |
|
|
|
|
|
AgentRole.FINALIZER: """[์ต์ข
ํตํฉ] |
|
|
๋ชจ๋ ์๊ฒฌ ์ข
ํฉโ์ต์ ๋ต๋ณ |
|
|
๋ช
ํ๊ตฌ์กฐ+์ค์ฉ์ ๋ณด+์ฐฝ์๊ท ํ |
|
|
๋ฐ๋ก ํต์ฌ ๋ด์ฉ๋ถํฐ ์์. ๋ถํ์ํ ํค๋๋ ๋งํฌ์
์์ด. ๋งํฌ๋ค์ด ํค๋(#, ##, ###) ์ฌ์ฉ ๊ธ์ง.""" |
|
|
}, |
|
|
'en': { |
|
|
AgentRole.SUPERVISOR: """[Supervisor-Structure] |
|
|
Immediate analysis: core intent+required info+answer structure |
|
|
Output: 5 key points (1 sentence each) |
|
|
Clear reasoning framework""", |
|
|
|
|
|
AgentRole.CREATIVE: """[Creative Generator] |
|
|
Follow structure, expand creatively |
|
|
Practical examples+innovative approach+specific advice |
|
|
Remove unnecessary explanations""", |
|
|
|
|
|
AgentRole.CRITIC: """[Critic-Verification] |
|
|
Quick review: accuracy/logic/practicality |
|
|
Only 3 improvement points |
|
|
Max 2 sentences each""", |
|
|
|
|
|
AgentRole.FINALIZER: """[Final Integration] |
|
|
Synthesize all inputsโoptimal answer |
|
|
Clear structure+practical info+creative balance |
|
|
Start with core content directly. No unnecessary headers or markup. No markdown headers (#, ##, ###).""" |
|
|
}, |
|
|
'ja': { |
|
|
AgentRole.SUPERVISOR: """[็ฃ็ฃ่
-ๆง้ ่จญ่จ] |
|
|
ๅณๆๅๆ๏ผๆ ธๅฟๆๅณ+ๅฟ
่ฆๆ
ๅ ฑ+ๅ็ญๆง้ |
|
|
ๅบๅ๏ผ5ใคใฎๆ ธๅฟใใคใณใ๏ผๅ1ๆ๏ผ |
|
|
ๆจ่ซไฝ็ณปๆ็คบ""", |
|
|
|
|
|
AgentRole.CREATIVE: """[ๅต้ ๆง็ๆ่
] |
|
|
ๅ
ฅๅๆง้ ใซๅพใฃใฆๅต้ ็ๆกๅผต |
|
|
ๅฎ็จไพ+้ฉๆฐ็ใขใใญใผใ+ๅ
ทไฝ็ใขใใใคใน |
|
|
ไธ่ฆใช่ชฌๆๅ้ค""", |
|
|
|
|
|
AgentRole.CRITIC: """[ๆน่ฉ่
-ๆค่จผ] |
|
|
่ฟ
้ใฌใใฅใผ๏ผๆญฃ็ขบๆง/่ซ็ๆง/ๅฎ็จๆง |
|
|
ๆนๅใใคใณใ3ใคใฎใฟ |
|
|
ๅ2ๆไปฅๅ
""", |
|
|
|
|
|
AgentRole.FINALIZER: """[ๆ็ต็ตฑๅ] |
|
|
ๅ
จๆ่ฆ็ตฑๅโๆ้ฉๅ็ญ |
|
|
ๆ็ขบๆง้ +ๅฎ็จๆ
ๅ ฑ+ๅต้ ๆงใใฉใณใน |
|
|
ๆ ธๅฟๅ
ๅฎนใใ็ดๆฅ้ๅงใไธ่ฆใชใใใใผใใใผใฏใขใใใชใใใใผใฏใใฆใณใใใใผ๏ผ#ใ##ใ###๏ผไฝฟ็จ็ฆๆญขใ""" |
|
|
}, |
|
|
'zh': { |
|
|
AgentRole.SUPERVISOR: """[ไธป็ฎก-็ปๆ่ฎพ่ฎก] |
|
|
็ซๅณๅๆ๏ผๆ ธๅฟๆๅพ+ๆ้ไฟกๆฏ+็ญๆก็ปๆ |
|
|
่พๅบ๏ผ5ไธชๆ ธๅฟ่ฆ็น๏ผๆฏไธช1ๅฅ๏ผ |
|
|
ๆจ็ไฝ็ณปๆ็กฎ""", |
|
|
|
|
|
AgentRole.CREATIVE: """[ๅๆ็ๆๅจ] |
|
|
ๆ็ปๆๅ้ ๆงๆฉๅฑ |
|
|
ๅฎ็จ็คบไพ+ๅๆฐๆนๆณ+ๅ
ทไฝๅปบ่ฎฎ |
|
|
ๅ ้คไธๅฟ
่ฆ็่งฃ้""", |
|
|
|
|
|
AgentRole.CRITIC: """[่ฏ่ฎบๅฎถ-้ช่ฏ] |
|
|
ๅฟซ้ๅฎกๆฅ๏ผๅ็กฎๆง/้ป่พๆง/ๅฎ็จๆง |
|
|
ไป
3ไธชๆน่ฟ็น |
|
|
ๆฏไธชๆๅค2ๅฅ""", |
|
|
|
|
|
AgentRole.FINALIZER: """[ๆ็ปๆดๅ] |
|
|
็ปผๅๆๆๆ่งโๆไฝณ็ญๆก |
|
|
ๆธ
ๆฐ็ปๆ+ๅฎ็จไฟกๆฏ+ๅๆๅนณ่กก |
|
|
็ดๆฅไปๆ ธๅฟๅ
ๅฎนๅผๅงใๆ ้ไธๅฟ
่ฆ็ๆ ้ขๆๆ ่ฎฐใ็ฆๆญขไฝฟ็จMarkdownๆ ้ข๏ผ#ใ##ใ###๏ผใ""" |
|
|
} |
|
|
} |
|
|
|
|
|
return prompts.get(lang, prompts['en']) |
|
|
|
|
|
async def parallel_process_agents( |
|
|
self, |
|
|
query: str, |
|
|
search_results: List[Dict], |
|
|
show_progress: bool = True, |
|
|
lang: str = None |
|
|
) -> AsyncGenerator[Tuple[str, str], None]: |
|
|
"""๋ณ๋ ฌ ์ฒ๋ฆฌ ํ์ดํ๋ผ์ธ (์บ์ฑ ์์)""" |
|
|
|
|
|
start_time = time.time() |
|
|
|
|
|
|
|
|
if lang is None: |
|
|
lang = self.language_detector.detect_language(query) |
|
|
|
|
|
|
|
|
self.compact_prompts = self._init_compact_prompts(lang) |
|
|
|
|
|
search_context = self._format_search_results(search_results) |
|
|
accumulated_response = "" |
|
|
agent_thoughts = "" |
|
|
|
|
|
|
|
|
reasoning_pattern = self.reasoning.get_reasoning_pattern(query, lang) |
|
|
|
|
|
try: |
|
|
|
|
|
if show_progress: |
|
|
progress_msg = { |
|
|
'ko': "๐ ๋ณ๋ ฌ ์ฒ๋ฆฌ ์์\n๐ ๊ฐ๋
์ ๋ถ์ + ๐ ์ถ๊ฐ ๊ฒ์ ๋์ ์งํ...\n\n", |
|
|
'en': "๐ Starting parallel processing\n๐ Supervisor analysis + ๐ Additional search in progress...\n\n", |
|
|
'ja': "๐ ไธฆๅๅฆ็้ๅง\n๐ ็ฃ็ฃ่
ๅๆ + ๐ ่ฟฝๅ ๆค็ดขๅๆ้ฒ่กไธญ...\n\n", |
|
|
'zh': "๐ ๅผๅงๅนถ่กๅค็\n๐ ไธป็ฎกๅๆ + ๐ ้ๅ ๆ็ดขๅๆถ่ฟ่ก...\n\n" |
|
|
} |
|
|
agent_thoughts = progress_msg.get(lang, progress_msg['en']) |
|
|
yield accumulated_response, agent_thoughts |
|
|
|
|
|
|
|
|
supervisor_prompt_templates = { |
|
|
'ko': f""" |
|
|
์ง๋ฌธ: {query} |
|
|
๊ฒ์๊ฒฐ๊ณผ: {search_context} |
|
|
์ถ๋ก ํจํด: {reasoning_pattern} |
|
|
์ฆ์ ํต์ฌ๊ตฌ์กฐ 5๊ฐ ์ ์""", |
|
|
'en': f""" |
|
|
Question: {query} |
|
|
Search results: {search_context} |
|
|
Reasoning pattern: {reasoning_pattern} |
|
|
Immediately provide 5 key structures""", |
|
|
'ja': f""" |
|
|
่ณชๅ: {query} |
|
|
ๆค็ดข็ตๆ: {search_context} |
|
|
ๆจ่ซใใฟใผใณ: {reasoning_pattern} |
|
|
ๅณๅบงใซ5ใคใฎๆ ธๅฟๆง้ ใๆ็คบ""", |
|
|
'zh': f""" |
|
|
้ฎ้ข: {query} |
|
|
ๆ็ดข็ปๆ: {search_context} |
|
|
ๆจ็ๆจกๅผ: {reasoning_pattern} |
|
|
็ซๅณๆไพ5ไธชๆ ธๅฟ็ปๆ""" |
|
|
} |
|
|
|
|
|
supervisor_prompt = supervisor_prompt_templates.get(lang, supervisor_prompt_templates['en']) |
|
|
|
|
|
supervisor_response = "" |
|
|
supervisor_task = self.llm.chat_stream_async( |
|
|
messages=[ |
|
|
{"role": "system", "content": self.compact_prompts[AgentRole.SUPERVISOR]}, |
|
|
{"role": "user", "content": supervisor_prompt} |
|
|
], |
|
|
temperature=0.3, |
|
|
max_tokens=500 |
|
|
) |
|
|
|
|
|
|
|
|
async for chunk in self.streaming.buffer_and_yield(supervisor_task): |
|
|
supervisor_response += chunk |
|
|
if show_progress and len(supervisor_response) < 300: |
|
|
supervisor_label = { |
|
|
'ko': "๐ ๊ฐ๋
์ ๋ถ์", |
|
|
'en': "๐ Supervisor Analysis", |
|
|
'ja': "๐ ็ฃ็ฃ่
ๅๆ", |
|
|
'zh': "๐ ไธป็ฎกๅๆ" |
|
|
} |
|
|
agent_thoughts = f"{supervisor_label.get(lang, supervisor_label['en'])}\n{supervisor_response[:300]}...\n\n" |
|
|
yield accumulated_response, agent_thoughts |
|
|
|
|
|
|
|
|
if show_progress: |
|
|
creative_msg = { |
|
|
'ko': "๐จ ์ฐฝ์์ฑ ์์ฑ์ + ๐ ๋นํ์ ์ค๋น...\n\n", |
|
|
'en': "๐จ Creative Generator + ๐ Critic preparing...\n\n", |
|
|
'ja': "๐จ ๅต้ ๆง็ๆ่
+ ๐ ๆน่ฉ่
ๆบๅไธญ...\n\n", |
|
|
'zh': "๐จ ๅๆ็ๆๅจ + ๐ ่ฏ่ฎบๅฎถๅๅคไธญ...\n\n" |
|
|
} |
|
|
agent_thoughts += creative_msg.get(lang, creative_msg['en']) |
|
|
yield accumulated_response, agent_thoughts |
|
|
|
|
|
|
|
|
creative_prompt_templates = { |
|
|
'ko': f""" |
|
|
์ง๋ฌธ: {query} |
|
|
๊ฐ๋
์๊ตฌ์กฐ: {supervisor_response} |
|
|
๊ฒ์๊ฒฐ๊ณผ: {search_context} |
|
|
์ฐฝ์์ +์ค์ฉ์ ๋ต๋ณ ์ฆ์์์ฑ""", |
|
|
'en': f""" |
|
|
Question: {query} |
|
|
Supervisor structure: {supervisor_response} |
|
|
Search results: {search_context} |
|
|
Generate creative+practical answer immediately""", |
|
|
'ja': f""" |
|
|
่ณชๅ: {query} |
|
|
็ฃ็ฃ่
ๆง้ : {supervisor_response} |
|
|
ๆค็ดข็ตๆ: {search_context} |
|
|
ๅต้ ็+ๅฎ็จ็ๅ็ญๅณๅบง็ๆ""", |
|
|
'zh': f""" |
|
|
้ฎ้ข: {query} |
|
|
ไธป็ฎก็ปๆ: {supervisor_response} |
|
|
ๆ็ดข็ปๆ: {search_context} |
|
|
็ซๅณ็ๆๅๆ+ๅฎ็จ็ญๆก""" |
|
|
} |
|
|
|
|
|
creative_prompt = creative_prompt_templates.get(lang, creative_prompt_templates['en']) |
|
|
|
|
|
creative_response = "" |
|
|
creative_partial = "" |
|
|
critic_started = False |
|
|
critic_response = "" |
|
|
|
|
|
creative_task = self.llm.chat_stream_async( |
|
|
messages=[ |
|
|
{"role": "system", "content": self.compact_prompts[AgentRole.CREATIVE]}, |
|
|
{"role": "user", "content": creative_prompt} |
|
|
], |
|
|
temperature=0.8, |
|
|
max_tokens=1500 |
|
|
) |
|
|
|
|
|
|
|
|
async for chunk in self.streaming.buffer_and_yield(creative_task): |
|
|
creative_response += chunk |
|
|
creative_partial += chunk |
|
|
|
|
|
|
|
|
if len(creative_partial) > 500 and not critic_started: |
|
|
critic_started = True |
|
|
|
|
|
|
|
|
critic_prompt_templates = { |
|
|
'ko': f""" |
|
|
์๋ณธ์ง๋ฌธ: {query} |
|
|
์ฐฝ์์ฑ๋ต๋ณ(์ผ๋ถ): {creative_partial} |
|
|
์ ์๊ฒํ โ๊ฐ์ ์ 3๊ฐ""", |
|
|
'en': f""" |
|
|
Original question: {query} |
|
|
Creative answer (partial): {creative_partial} |
|
|
Quick reviewโ3 improvements""", |
|
|
'ja': f""" |
|
|
ๅ
ใฎ่ณชๅ: {query} |
|
|
ๅต้ ็ๅ็ญ๏ผไธ้จ๏ผ: {creative_partial} |
|
|
่ฟ
้ใฌใใฅใผโๆนๅ็น3ใค""", |
|
|
'zh': f""" |
|
|
ๅๅง้ฎ้ข: {query} |
|
|
ๅๆ็ญๆก๏ผ้จๅ๏ผ: {creative_partial} |
|
|
ๅฟซ้ๅฎกๆฅโ3ไธชๆน่ฟ็น""" |
|
|
} |
|
|
|
|
|
critic_prompt = critic_prompt_templates.get(lang, critic_prompt_templates['en']) |
|
|
|
|
|
critic_task = asyncio.create_task( |
|
|
self._run_critic_async(critic_prompt) |
|
|
) |
|
|
|
|
|
if show_progress: |
|
|
display_creative = creative_response[:400] + "..." if len(creative_response) > 400 else creative_response |
|
|
creative_label = { |
|
|
'ko': "๐จ ์ฐฝ์์ฑ ์์ฑ์", |
|
|
'en': "๐จ Creative Generator", |
|
|
'ja': "๐จ ๅต้ ๆง็ๆ่
", |
|
|
'zh': "๐จ ๅๆ็ๆๅจ" |
|
|
} |
|
|
agent_thoughts = f"{creative_label.get(lang, creative_label['en'])}\n{display_creative}\n\n" |
|
|
yield accumulated_response, agent_thoughts |
|
|
|
|
|
|
|
|
if critic_started: |
|
|
critic_response = await critic_task |
|
|
|
|
|
if show_progress: |
|
|
critic_label = { |
|
|
'ko': "๐ ๋นํ์ ๊ฒํ ", |
|
|
'en': "๐ Critic Review", |
|
|
'ja': "๐ ๆน่ฉ่
ใฌใใฅใผ", |
|
|
'zh': "๐ ่ฏ่ฎบๅฎถๅฎกๆฅ" |
|
|
} |
|
|
agent_thoughts += f"{critic_label.get(lang, critic_label['en'])}\n{critic_response[:200]}...\n\n" |
|
|
yield accumulated_response, agent_thoughts |
|
|
|
|
|
|
|
|
quality_score, need_more = self.quality_checker.evaluate_response( |
|
|
creative_response, query, lang |
|
|
) |
|
|
|
|
|
if not need_more and quality_score > 0.85: |
|
|
|
|
|
accumulated_response = self.response_cleaner.clean_response(creative_response) |
|
|
|
|
|
if show_progress: |
|
|
quality_msg = { |
|
|
'ko': f"โ
ํ์ง ์ถฉ์กฑ (์ ์: {quality_score:.2f})\n์กฐ๊ธฐ ์๋ฃ!\n", |
|
|
'en': f"โ
Quality met (score: {quality_score:.2f})\nEarly completion!\n", |
|
|
'ja': f"โ
ๅ่ณชๆบ่ถณ (ในใณใข: {quality_score:.2f})\nๆฉๆๅฎไบ!\n", |
|
|
'zh': f"โ
่ดจ้ๆปก่ถณ (ๅๆฐ: {quality_score:.2f})\nๆๅๅฎๆ!\n" |
|
|
} |
|
|
agent_thoughts += quality_msg.get(lang, quality_msg['en']) |
|
|
|
|
|
yield accumulated_response, agent_thoughts |
|
|
return |
|
|
|
|
|
|
|
|
if show_progress: |
|
|
final_msg = { |
|
|
'ko': "โ
์ต์ข
ํตํฉ ์ค...\n\n", |
|
|
'en': "โ
Final integration in progress...\n\n", |
|
|
'ja': "โ
ๆ็ต็ตฑๅไธญ...\n\n", |
|
|
'zh': "โ
ๆ็ปๆดๅไธญ...\n\n" |
|
|
} |
|
|
agent_thoughts += final_msg.get(lang, final_msg['en']) |
|
|
yield accumulated_response, agent_thoughts |
|
|
|
|
|
|
|
|
final_prompt_templates = { |
|
|
'ko': f""" |
|
|
์ง๋ฌธ: {query} |
|
|
์ฐฝ์์ฑ๋ต๋ณ: {creative_response} |
|
|
๋นํํผ๋๋ฐฑ: {critic_response} |
|
|
๊ฐ๋
์๊ตฌ์กฐ: {supervisor_response} |
|
|
์ต์ข
ํตํฉโ์๋ฒฝ๋ต๋ณ. ๋งํฌ๋ค์ด ํค๋(#, ##, ###) ์ฌ์ฉ ๊ธ์ง.""", |
|
|
'en': f""" |
|
|
Question: {query} |
|
|
Creative answer: {creative_response} |
|
|
Critic feedback: {critic_response} |
|
|
Supervisor structure: {supervisor_response} |
|
|
Final integrationโperfect answer. No markdown headers (#, ##, ###).""", |
|
|
'ja': f""" |
|
|
่ณชๅ: {query} |
|
|
ๅต้ ็ๅ็ญ: {creative_response} |
|
|
ๆน่ฉใใฃใผใใใใฏ: {critic_response} |
|
|
็ฃ็ฃ่
ๆง้ : {supervisor_response} |
|
|
ๆ็ต็ตฑๅโๅฎ็งใชๅ็ญใใใผใฏใใฆใณใใใใผ๏ผ#ใ##ใ###๏ผไฝฟ็จ็ฆๆญขใ""", |
|
|
'zh': f""" |
|
|
้ฎ้ข: {query} |
|
|
ๅๆ็ญๆก: {creative_response} |
|
|
่ฏ่ฎบๅ้ฆ: {critic_response} |
|
|
ไธป็ฎก็ปๆ: {supervisor_response} |
|
|
ๆ็ปๆดๅโๅฎ็พ็ญๆกใ็ฆๆญขไฝฟ็จMarkdownๆ ้ข๏ผ#ใ##ใ###๏ผใ""" |
|
|
} |
|
|
|
|
|
final_prompt = final_prompt_templates.get(lang, final_prompt_templates['en']) |
|
|
|
|
|
final_task = self.llm.chat_stream_async( |
|
|
messages=[ |
|
|
{"role": "system", "content": self.compact_prompts[AgentRole.FINALIZER]}, |
|
|
{"role": "user", "content": final_prompt} |
|
|
], |
|
|
temperature=0.5, |
|
|
max_tokens=2500 |
|
|
) |
|
|
|
|
|
|
|
|
accumulated_response = "" |
|
|
|
|
|
async for chunk in final_task: |
|
|
accumulated_response += chunk |
|
|
|
|
|
cleaned_response = self.response_cleaner.clean_response(accumulated_response) |
|
|
yield cleaned_response, agent_thoughts |
|
|
|
|
|
|
|
|
accumulated_response = self.response_cleaner.clean_response(accumulated_response) |
|
|
|
|
|
|
|
|
processing_time = time.time() - start_time |
|
|
time_msg = { |
|
|
'ko': f"\n\n---\nโก ์ฒ๋ฆฌ ์๊ฐ: {processing_time:.1f}์ด", |
|
|
'en': f"\n\n---\nโก Processing time: {processing_time:.1f} seconds", |
|
|
'ja': f"\n\n---\nโก ๅฆ็ๆ้: {processing_time:.1f}็ง", |
|
|
'zh': f"\n\n---\nโก ๅค็ๆถ้ด: {processing_time:.1f}็ง" |
|
|
} |
|
|
accumulated_response += time_msg.get(lang, time_msg['en']) |
|
|
|
|
|
yield accumulated_response, agent_thoughts |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = { |
|
|
'ko': f"โ ์ค๋ฅ ๋ฐ์: {str(e)}", |
|
|
'en': f"โ Error occurred: {str(e)}", |
|
|
'ja': f"โ ใจใฉใผ็บ็: {str(e)}", |
|
|
'zh': f"โ ๅ็้่ฏฏ: {str(e)}" |
|
|
} |
|
|
yield error_msg.get(lang, error_msg['en']), agent_thoughts |
|
|
|
|
|
async def _run_critic_async(self, prompt: str) -> str: |
|
|
"""๋นํ์ ๋น๋๊ธฐ ์คํ with error handling""" |
|
|
try: |
|
|
response = "" |
|
|
async for chunk in self.llm.chat_stream_async( |
|
|
messages=[ |
|
|
{"role": "system", "content": self.compact_prompts[AgentRole.CRITIC]}, |
|
|
{"role": "user", "content": prompt} |
|
|
], |
|
|
temperature=0.2, |
|
|
max_tokens=500 |
|
|
): |
|
|
response += chunk |
|
|
return response |
|
|
except Exception as e: |
|
|
|
|
|
lang = 'ko' if '์ง๋ฌธ' in prompt else 'en' |
|
|
error_msg = { |
|
|
'ko': "๋นํ ์ฒ๋ฆฌ ์ค ์ค๋ฅ", |
|
|
'en': "Error during critic processing", |
|
|
'ja': "ๆน่ฉๅฆ็ไธญใฎใจใฉใผ", |
|
|
'zh': "่ฏ่ฎบๅค็ไธญๅบ้" |
|
|
} |
|
|
return error_msg.get(lang, error_msg['en']) |
|
|
|
|
|
def _format_search_results(self, results: List[Dict]) -> str: |
|
|
"""๊ฒ์ ๊ฒฐ๊ณผ ์์ถ ํฌ๋งท""" |
|
|
if not results: |
|
|
return "No search results" |
|
|
|
|
|
formatted = [] |
|
|
for i, r in enumerate(results[:3], 1): |
|
|
title = r.get('title', '')[:50] |
|
|
desc = r.get('description', '')[:100] |
|
|
formatted.append(f"[{i}]{title}:{desc}") |
|
|
|
|
|
return " | ".join(formatted) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def create_optimized_gradio_interface(): |
|
|
"""์ต์ ํ๋ Gradio ์ธํฐํ์ด์ค (์บ์ฑ ์์)""" |
|
|
|
|
|
|
|
|
system = SpeedOptimizedMultiAgentSystem() |
|
|
|
|
|
def process_query_optimized( |
|
|
message: str, |
|
|
history: List[Dict], |
|
|
use_search: bool, |
|
|
show_agent_thoughts: bool, |
|
|
search_count: int, |
|
|
language_mode: str |
|
|
): |
|
|
"""์ต์ ํ๋ ์ฟผ๋ฆฌ ์ฒ๋ฆฌ - ์ค์๊ฐ ์คํธ๋ฆฌ๋ฐ ๋ฒ์ """ |
|
|
|
|
|
if not message: |
|
|
yield history, "", "" |
|
|
return |
|
|
|
|
|
|
|
|
if language_mode == "Auto": |
|
|
lang = None |
|
|
else: |
|
|
lang_map = {"Korean": "ko", "English": "en", "Japanese": "ja", "Chinese": "zh"} |
|
|
lang = lang_map.get(language_mode, None) |
|
|
|
|
|
|
|
|
try: |
|
|
import nest_asyncio |
|
|
nest_asyncio.apply() |
|
|
except ImportError: |
|
|
pass |
|
|
|
|
|
try: |
|
|
|
|
|
search_results = [] |
|
|
search_display = "" |
|
|
|
|
|
|
|
|
detected_lang = lang or system.language_detector.detect_language(message) |
|
|
|
|
|
if use_search: |
|
|
|
|
|
processing_msg = { |
|
|
'ko': "โก ๊ณ ์ ์ฒ๋ฆฌ ์ค...", |
|
|
'en': "โก High-speed processing...", |
|
|
'ja': "โก ้ซ้ๅฆ็ไธญ...", |
|
|
'zh': "โก ้ซ้ๅค็ไธญ..." |
|
|
} |
|
|
history_with_message = history + [ |
|
|
{"role": "user", "content": message}, |
|
|
{"role": "assistant", "content": processing_msg.get(detected_lang, processing_msg['en'])} |
|
|
] |
|
|
yield history_with_message, "", "" |
|
|
|
|
|
|
|
|
async def search_wrapper(): |
|
|
return await system.search.search_async(message, count=search_count, lang=detected_lang) |
|
|
|
|
|
loop = asyncio.new_event_loop() |
|
|
asyncio.set_event_loop(loop) |
|
|
search_results = loop.run_until_complete(search_wrapper()) |
|
|
|
|
|
if search_results: |
|
|
ref_label = { |
|
|
'ko': "๐ ์ฐธ๊ณ ์๋ฃ", |
|
|
'en': "๐ References", |
|
|
'ja': "๐ ๅ่่ณๆ", |
|
|
'zh': "๐ ๅ่่ตๆ" |
|
|
} |
|
|
search_display = f"{ref_label.get(detected_lang, ref_label['en'])}\n\n" |
|
|
for i, result in enumerate(search_results[:3], 1): |
|
|
search_display += f"**{i}. [{result['title'][:50]}]({result['url']})**\n" |
|
|
search_display += f" {result['description'][:100]}...\n\n" |
|
|
|
|
|
|
|
|
current_history = history + [{"role": "user", "content": message}] |
|
|
|
|
|
|
|
|
async def stream_responses(): |
|
|
"""์ค์๊ฐ ์คํธ๋ฆฌ๋ฐ ์ ๋๋ ์ดํฐ""" |
|
|
async for response, thoughts in system.parallel_process_agents( |
|
|
query=message, |
|
|
search_results=search_results, |
|
|
show_progress=show_agent_thoughts, |
|
|
lang=detected_lang |
|
|
): |
|
|
yield response, thoughts |
|
|
|
|
|
|
|
|
loop = asyncio.new_event_loop() |
|
|
asyncio.set_event_loop(loop) |
|
|
|
|
|
|
|
|
gen = stream_responses() |
|
|
|
|
|
while True: |
|
|
try: |
|
|
|
|
|
task = asyncio.ensure_future(gen.__anext__(), loop=loop) |
|
|
response, thoughts = loop.run_until_complete(task) |
|
|
|
|
|
|
|
|
updated_history = current_history + [ |
|
|
{"role": "assistant", "content": response} |
|
|
] |
|
|
yield updated_history, thoughts, search_display |
|
|
|
|
|
except StopAsyncIteration: |
|
|
break |
|
|
|
|
|
except Exception as e: |
|
|
error_history = history + [ |
|
|
{"role": "user", "content": message}, |
|
|
{"role": "assistant", "content": f"โ Error: {str(e)}"} |
|
|
] |
|
|
yield error_history, "", "" |
|
|
finally: |
|
|
|
|
|
try: |
|
|
loop.close() |
|
|
except: |
|
|
pass |
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="โก Speed-Optimized Multi-Agent System (No Cache)", |
|
|
theme=gr.themes.Soft(), |
|
|
css=""" |
|
|
.gradio-container { |
|
|
max-width: 1400px !important; |
|
|
margin: auto !important; |
|
|
} |
|
|
""" |
|
|
) as demo: |
|
|
gr.Markdown(""" |
|
|
# โก Enhanced Multi-Agent RAG System (์บ์ฑ ์ ๊ฑฐ ๋ฒ์ ) |
|
|
**Complex questions processed within 5-8 seconds | Multi-language support** |
|
|
|
|
|
**Optimization Features:** |
|
|
- ๐ Parallel Processing: Concurrent agent execution |
|
|
- โก Stream Buffering: Network optimization |
|
|
- ๐ฏ Early Termination: Complete immediately when quality is met |
|
|
- ๐ Multi-language: Auto-detect Korean/English/Japanese/Chinese |
|
|
- โ **Caching Disabled**: ์บ์ฑ ๊ธฐ๋ฅ ์ ๊ฑฐ๋จ |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=3): |
|
|
chatbot = gr.Chatbot( |
|
|
height=500, |
|
|
label="๐ฌ Chat", |
|
|
type="messages" |
|
|
) |
|
|
|
|
|
msg = gr.Textbox( |
|
|
label="Enter complex question", |
|
|
placeholder="Enter complex questions requiring analysis, strategy, or creative solutions...", |
|
|
lines=3 |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
submit = gr.Button("โก High-Speed Process", variant="primary") |
|
|
clear = gr.Button("๐ Reset") |
|
|
|
|
|
with gr.Accordion("๐ค Agent Processing", open=False): |
|
|
agent_thoughts = gr.Markdown() |
|
|
|
|
|
with gr.Accordion("๐ Search Sources", open=False): |
|
|
search_sources = gr.Markdown() |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("**โ๏ธ Settings**") |
|
|
|
|
|
language_mode = gr.Radio( |
|
|
choices=["Auto", "Korean", "English", "Japanese", "Chinese"], |
|
|
value="Auto", |
|
|
label="๐ Language Mode" |
|
|
) |
|
|
|
|
|
use_search = gr.Checkbox( |
|
|
label="๐ Use Web Search", |
|
|
value=True |
|
|
) |
|
|
|
|
|
show_agent_thoughts = gr.Checkbox( |
|
|
label="๐ง Show Processing", |
|
|
value=True |
|
|
) |
|
|
|
|
|
search_count = gr.Slider( |
|
|
minimum=3, |
|
|
maximum=10, |
|
|
value=5, |
|
|
step=1, |
|
|
label="Search Results Count" |
|
|
) |
|
|
|
|
|
gr.Markdown(""" |
|
|
**โก Optimization Status** |
|
|
|
|
|
**Active Optimizations:** |
|
|
- โ
Parallel Processing |
|
|
- โ ~~Smart Caching~~ (์ ๊ฑฐ๋จ) |
|
|
- โ
Buffer Streaming |
|
|
- โ
Early Termination |
|
|
- โ
Compressed Prompts |
|
|
- โ
Multi-language Support |
|
|
- โ
Error Recovery |
|
|
|
|
|
**Expected Processing Time:** |
|
|
- Simple Query: 3-5 seconds |
|
|
- Complex Query: 5-8 seconds |
|
|
- Very Complex: 8-12 seconds |
|
|
""") |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
|
|
|
"AI ๊ธฐ์ ์ด ํฅํ 10๋
๊ฐ ํ๊ตญ ๊ฒฝ์ ์ ๋ฏธ์น ์ํฅ์ ๋ค๊ฐ๋๋ก ๋ถ์ํ๊ณ ๋์ ์ ๋ต์ ์ ์ํด์ค", |
|
|
"์คํํธ์
์ด ๋๊ธฐ์
๊ณผ ๊ฒฝ์ํ๊ธฐ ์ํ ํ์ ์ ์ธ ์ ๋ต์ ๋จ๊ณ๋ณ๋ก ์๋ฆฝํด์ค", |
|
|
|
|
|
"Analyze the multifaceted impact of quantum computing on current encryption systems and propose alternatives", |
|
|
"Design 5 innovative business models for climate change mitigation with practical implementation details", |
|
|
|
|
|
"ใกใฟใใผในๆไปฃใฎๆ่ฒ้ฉๆฐๆนๆกใๅฎ่ฃ
ๅฏ่ฝใชใฌใใซใงๆๆกใใฆใใ ใใ", |
|
|
|
|
|
"ๅๆไบบๅทฅๆบ่ฝๅฏนๆชๆฅๅๅนดๅ
จ็็ปๆต็ๅฝฑๅๅนถๆๅบๅบๅฏน็ญ็ฅ" |
|
|
], |
|
|
inputs=msg |
|
|
) |
|
|
|
|
|
|
|
|
submit.click( |
|
|
process_query_optimized, |
|
|
inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode], |
|
|
outputs=[chatbot, agent_thoughts, search_sources] |
|
|
).then( |
|
|
lambda: "", |
|
|
None, |
|
|
msg |
|
|
) |
|
|
|
|
|
msg.submit( |
|
|
process_query_optimized, |
|
|
inputs=[msg, chatbot, use_search, show_agent_thoughts, search_count, language_mode], |
|
|
outputs=[chatbot, agent_thoughts, search_sources] |
|
|
).then( |
|
|
lambda: "", |
|
|
None, |
|
|
msg |
|
|
) |
|
|
|
|
|
clear.click( |
|
|
lambda: ([], "", ""), |
|
|
None, |
|
|
[chatbot, agent_thoughts, search_sources] |
|
|
) |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print(""" |
|
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
|
โ โก Speed-Optimized Multi-Agent System (No Cache) โก โ |
|
|
โ โ |
|
|
โ High-speed AI system processing complex questions โ |
|
|
โ โ |
|
|
โ Features: โ |
|
|
โ โข Multi-language support (KO/EN/JA/ZH) โ |
|
|
โ โข Improved error recovery โ |
|
|
โ โข NO CACHING (์บ์ฑ ๊ธฐ๋ฅ ์ ๊ฑฐ๋จ) โ |
|
|
โ โข Adaptive stream buffering โ |
|
|
โ โข Response cleaning & formatting โ |
|
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
|
""") |
|
|
|
|
|
|
|
|
if not os.getenv("FIREWORKS_API_KEY"): |
|
|
print("\nโ ๏ธ FIREWORKS_API_KEY is not set.") |
|
|
|
|
|
if not os.getenv("BRAVE_SEARCH_API_KEY"): |
|
|
print("\nโ ๏ธ BRAVE_SEARCH_API_KEY is not set.") |
|
|
|
|
|
|
|
|
demo = create_optimized_gradio_interface() |
|
|
|
|
|
is_hf_spaces = os.getenv("SPACE_ID") is not None |
|
|
|
|
|
if is_hf_spaces: |
|
|
print("\n๐ค Running in optimized mode on Hugging Face Spaces (No Cache)...") |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|
else: |
|
|
print("\n๐ป Running in optimized mode on local environment (No Cache)...") |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860, share=False) |