import streamlit as st import time import asyncio import nest_asyncio import traceback from typing import List, Dict, Any import re # for extracting citation IDs # --- Configuration and Service Initialization --- try: print("App: Loading config...") import config print("App: Loading utils...") from utils import clean_source_text print("App: Loading services...") from services.retriever import init_retriever, get_retriever_status from services.openai_service import init_openai_client, get_openai_status print("App: Loading RAG processor... ") from rag_processor import execute_validate_generate_pipeline, PIPELINE_VALIDATE_GENERATE_GPT4O print("App: Imports successful.") except ImportError as e: st.error(f"Fatal Error: Module import failed. {e}", icon="🚨") traceback.print_exc() st.stop() except Exception as e: st.error(f"Fatal Error during initial setup: {e}", icon="🚨") traceback.print_exc() st.stop() nest_asyncio.apply() # --- Initialize Required Services --- print("App: Initializing services...") try: retriever_ready_init, retriever_msg_init = init_retriever() openai_ready_init, openai_msg_init = init_openai_client() print("App: Service initialization calls complete.") except Exception as init_err: st.error(f"Error during service initialization: {init_err}", icon="🔥") traceback.print_exc() # --- Streamlit Page Configuration and Styling --- st.set_page_config(page_title="Divrey Yoel AI Chat (GPT-4o Gen)", layout="wide") st.markdown("""""", unsafe_allow_html=True) st.markdown("

דברות קודש - חיפוש ועיון

", unsafe_allow_html=True) st.markdown("

מבוסס על ספרי דברי יואל מסאטמאר זצוק'ל - אחזור מידע חכם (RAG)

", unsafe_allow_html=True) st.markdown("

תהליך: אחזור → אימות (GPT-4o) → יצירה (GPT-4o)

", unsafe_allow_html=True) # --- UI Helper Functions --- def display_sidebar() -> Dict[str, Any]: st.sidebar.markdown("

מצב המערכת

", unsafe_allow_html=True) retriever_ready, _ = get_retriever_status() openai_ready, _ = get_openai_status() st.sidebar.markdown( f"

מאחזר (Pinecone): {'✅' if retriever_ready else '❌'}

", unsafe_allow_html=True ) if not retriever_ready: st.sidebar.error("מאחזר אינו זמין.", icon="🛑") st.stop() st.sidebar.markdown( f"

OpenAI ({config.OPENAI_VALIDATION_MODEL} / {config.OPENAI_GENERATION_MODEL}): {'✅' if openai_ready else '❌'}

", unsafe_allow_html=True ) if not openai_ready: st.sidebar.error("OpenAI אינו זמין.", icon="⚠️") st.sidebar.markdown("
", unsafe_allow_html=True) n_retrieve = st.sidebar.slider("מספר פסקאות לאחזור", 1, 300, config.DEFAULT_N_RETRIEVE) max_validate = min(n_retrieve, 100) n_validate = st.sidebar.slider( "פסקאות לאימות (GPT-4o)", 1, max_validate, min(config.DEFAULT_N_VALIDATE, max_validate), disabled=not openai_ready ) st.sidebar.info("התשובות מבוססות רק על המקורות שאומתו.", icon="ℹ️") # Prompt editors with st.sidebar.expander("Edit RAG Prompts", expanded=False): gen_prompt = st.text_area( "System prompt (generation)", value=config.OPENAI_SYSTEM_PROMPT, height=200 ) val_prompt = st.text_area( "Validation prompt (GPT-4o)", value=config.VALIDATION_PROMPT_TEMPLATE, height=200 ) config.OPENAI_SYSTEM_PROMPT = gen_prompt config.VALIDATION_PROMPT_TEMPLATE = val_prompt return {"n_retrieve": n_retrieve, "n_validate": n_validate, "services_ready": (retriever_ready and openai_ready)} def display_chat_message(message: Dict[str, Any]): role = message.get("role", "assistant") with st.chat_message(role): st.markdown(message.get('content', ''), unsafe_allow_html=True) if role == "assistant" and message.get("final_docs"): docs = message["final_docs"] # Expander for all passed docs exp_title = f"הצג {len(docs)} קטעי מקור שנשלחו למחולל" with st.expander(exp_title, expanded=False): st.markdown("
", unsafe_allow_html=True) for i, doc in enumerate(docs, start=1): source = doc.get('source_name', '') or 'מקור לא ידוע' text = clean_source_text(doc.get('hebrew_text', '')) st.markdown( f"
מקור {i}: {source}
", unsafe_allow_html=True ) st.markdown(f"
{text}
", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) def display_status_updates(status_log: List[str]): if status_log: with st.expander("הצג פרטי עיבוד", expanded=False): for u in status_log: st.markdown( f"- {u}", unsafe_allow_html=True ) # --- Main Application Logic --- if "messages" not in st.session_state: st.session_state.messages = [] rag_params = display_sidebar() # Render history for msg in st.session_state.messages: display_chat_message(msg) if prompt := st.chat_input("שאל שאלה בענייני חסידות...", disabled=not rag_params["services_ready"]): st.session_state.messages.append({"role": "user", "content": prompt}) display_chat_message(st.session_state.messages[-1]) with st.chat_message("assistant"): msg_placeholder = st.empty() status_container = st.status("מעבד בקשה...", expanded=True) chunks: List[str] = [] try: def status_cb(m): status_container.update(label=f"{m}") def stream_cb(c): chunks.append(c) msg_placeholder.markdown( f"
{''.join(chunks)}▌
", unsafe_allow_html=True ) loop = asyncio.get_event_loop() final_rag = loop.run_until_complete( execute_validate_generate_pipeline( history=st.session_state.messages, params=rag_params, status_callback=status_cb, stream_callback=stream_cb ) ) if isinstance(final_rag, dict): raw = final_rag.get("final_response", "") err = final_rag.get("error") log = final_rag.get("status_log", []) docs = final_rag.get("generator_input_documents", []) pipeline = final_rag.get("pipeline_used", PIPELINE_VALIDATE_GENERATE_GPT4O) # wrap result final = raw if not (err and final.strip().startswith("{final or 'לא התקבלה תשובה מהמחולל.'}" msg_placeholder.markdown(final, unsafe_allow_html=True) # Identify cited IDs cited_ids = set(re.findall(r'מקור[\s\u00A0\u2000-\u200F\u2028-\u202F\u205F\u3000]*([0-9]+)', raw)) if cited_ids: enumerated_docs = list(enumerate(docs, start=1)) docs_to_show = [(idx, doc) for idx, doc in enumerated_docs if str(idx) in cited_ids] else: docs_to_show = list(enumerate(docs, start=1)) if docs_to_show: label = f"הצג {len(docs_to_show)} קטעי מקור שהוזכרו בתשובה" with st.expander(label, expanded=False): st.markdown("
", unsafe_allow_html=True) for idx, doc in docs_to_show: source = doc.get('source_name', '') or 'מקור לא ידוע' text = clean_source_text(doc.get('hebrew_text', '')) st.markdown( f"
מקור {idx}: {source}
", unsafe_allow_html=True ) st.markdown(f"
{text}
", unsafe_allow_html=True) st.markdown("
", unsafe_allow_html=True) # store message assistant_data = { "role": "assistant", "content": final, "final_docs": docs, "pipeline_used": pipeline, "status_log": log, "error": err } st.session_state.messages.append(assistant_data) display_status_updates(log) if err: status_container.update(label="שגיאה בעיבוד!", state="error", expanded=False) else: status_container.update(label="העיבוד הושלם!", state="complete", expanded=False) else: msg_placeholder.markdown( "
שגיאה בלתי צפויה בתקשורת.
", unsafe_allow_html=True ) st.session_state.messages.append({ "role": "assistant", "content": "שגיאה בלתי צפויה בתקשורת.", "final_docs": [], "pipeline_used": "Error", "status_log": ["Unexpected result"], "error": "Unexpected" }) status_container.update(label="שגיאה בלתי צפויה!", state="error", expanded=False) except Exception as e: traceback.print_exc() err_html = ( f"
שגיאה קריטית!
נסה לרענן." f"
פרטים
{traceback.format_exc()}
" ) msg_placeholder.error(err_html, icon="🔥") st.session_state.messages.append({ "role": "assistant", "content": err_html, "final_docs": [], "pipeline_used": "Critical Error", "status_log": [f"Critical: {type(e).__name__}"], "error": str(e) }) status_container.update(label=str(e), state="error", expanded=False)