Spaces:

ABE101
/

april28ragdivreyyoel

Running

File size: 4,406 Bytes

ae4184d

import os
from dotenv import load_dotenv

load_dotenv()

# --- LangSmith Configuration ---
LANGSMITH_ENDPOINT = os.environ.get("LANGSMITH_ENDPOINT", "https://api.smith.langchain.com")
LANGSMITH_TRACING = os.environ.get("LANGSMITH_TRACING", "true")
LANGSMITH_API_KEY = os.environ.get("LANGSMITH_API_KEY")
LANGSMITH_PROJECT = os.environ.get("LANGSMITH_PROJECT", "DivreyYoel-RAG-GPT4-Gen")

# --- API Keys (Required) ---
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")

# --- Model Configuration ---
EMBEDDING_MODEL = os.environ.get("OPENAI_EMBEDDING_MODEL", "text-embedding-3-large")
OPENAI_VALIDATION_MODEL = os.environ.get("OPENAI_VALIDATION_MODEL", "gpt-4o")
OPENAI_GENERATION_MODEL = os.environ.get("OPENAI_GENERATION_MODEL", "o3")

# --- Pinecone Configuration ---
PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX_NAME", "chassidus-index")

# --- Default RAG Pipeline Parameters ---
DEFAULT_N_RETRIEVE = 300
DEFAULT_N_VALIDATE = 100

# --- System Prompts ---
OPENAI_SYSTEM_PROMPT = """You are an expert assistant specializing in Chassidic texts, particularly the works of the Satmar Rebbe, Rabbi Yoel Teitelbaum (Divrei Yoel).
Your task is to answer the user's question based *exclusively* on the provided source text snippets (paragraphs from relevant books). Do not use any prior knowledge or external information.

**Source Text Format:**
The relevant source texts will be provided below under the heading "Source Texts:". Each source is numbered and includes an ID.

**Response Requirements:**
1.  **Language:** Respond **exclusively in Hebrew**.
2.  **Basis:** Base your answer *strictly* on the information contained within the provided "Source Texts:". Do not infer, add external knowledge, or answer if the context does not contain relevant information.
3.  **Attribution (Optional but Recommended):** When possible, mention the source number (e.g., "כפי שמופיע במקור 3") where the information comes from. Do not invent information. Use quotes sparingly and only when essential, quoting the Hebrew text directly.
4.  **Completeness:** Synthesize information from *multiple* relevant sources if they contribute to the answer.
5.  **Handling Lack of Information:** If the provided sources do not contain information relevant to the question, state clearly in Hebrew that the provided texts do not contain the answer (e.g., "על פי המקורות שסופקו, אין מידע לענות על שאלה זו."). Do not attempt to answer based on outside knowledge.
6.  **Clarity and Conciseness:** Provide a clear, well-structured, and concise answer in Hebrew. Focus on directly answering the user's question.
7.  **Tone:** Maintain a formal and respectful tone appropriate for discussing religious texts.
8.  **No Greetings/Closings:** Do not include introductory greetings (e.g., "שלום") or concluding remarks (e.g., "בברכה", "מקווה שעזרתי"). Focus solely on the answer.
"""

VALIDATION_PROMPT_TEMPLATE = """
User Question (Hebrew):
\"{user_question}\"

Text Paragraph (Paragraph {paragraph_index}):
Hebrew:
---
{hebrew_text}
---
English:
---
{english_text}
---

Instruction:
Analyze the Text Paragraph. Determine if it contains information that *directly* answers or significantly contributes to answering the User Question.
Respond ONLY with valid JSON: {{\"contains_relevant_info\": boolean, \"justification\": \"Brief Hebrew explanation\"}}.
Output only the JSON object.
"""

# --- Helper Functions ---
def check_env_vars():
    missing_keys = []
    if not LANGSMITH_API_KEY: missing_keys.append("LANGSMITH_API_KEY")
    if not OPENAI_API_KEY: missing_keys.append("OPENAI_API_KEY")
    if not PINECONE_API_KEY: missing_keys.append("PINECONE_API_KEY")
    return missing_keys

def configure_langsmith():
    os.environ["LANGSMITH_ENDPOINT"] = LANGSMITH_ENDPOINT
    os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING
    if LANGSMITH_API_KEY: os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY
    if LANGSMITH_PROJECT: os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT
    print(f"LangSmith configured: Endpoint={LANGSMITH_ENDPOINT}, Tracing={LANGSMITH_TRACING}, Project={LANGSMITH_PROJECT or 'Default'}")

missing = check_env_vars()
if missing:
    print(f"Warning: Missing essential API keys: {', '.join(missing)}")
else:
    print("All essential API keys found.")

configure_langsmith()