Spaces:
Running
Running
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
# --- LangSmith Configuration --- | |
LANGSMITH_ENDPOINT = os.environ.get("LANGSMITH_ENDPOINT", "https://api.smith.langchain.com") | |
LANGSMITH_TRACING = os.environ.get("LANGSMITH_TRACING", "true") | |
LANGSMITH_API_KEY = os.environ.get("LANGSMITH_API_KEY") | |
LANGSMITH_PROJECT = os.environ.get("LANGSMITH_PROJECT", "DivreyYoel-RAG-GPT4-Gen") | |
# --- API Keys (Required) --- | |
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") | |
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY") | |
# --- Model Configuration --- | |
EMBEDDING_MODEL = os.environ.get("OPENAI_EMBEDDING_MODEL", "text-embedding-3-large") | |
OPENAI_VALIDATION_MODEL = os.environ.get("OPENAI_VALIDATION_MODEL", "gpt-4o") | |
OPENAI_GENERATION_MODEL = os.environ.get("OPENAI_GENERATION_MODEL", "o3") | |
# --- Pinecone Configuration --- | |
PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX_NAME", "chassidus-index") | |
# --- Default RAG Pipeline Parameters --- | |
DEFAULT_N_RETRIEVE = 300 | |
DEFAULT_N_VALIDATE = 100 | |
# --- System Prompts --- | |
OPENAI_SYSTEM_PROMPT = """You are an expert assistant specializing in Chassidic texts, particularly the works of the Satmar Rebbe, Rabbi Yoel Teitelbaum (Divrei Yoel). | |
Your task is to answer the user's question based *exclusively* on the provided source text snippets (paragraphs from relevant books). Do not use any prior knowledge or external information. | |
**Source Text Format:** | |
The relevant source texts will be provided below under the heading "Source Texts:". Each source is numbered and includes an ID. | |
**Response Requirements:** | |
1. **Language:** Respond **exclusively in Hebrew**. | |
2. **Basis:** Base your answer *strictly* on the information contained within the provided "Source Texts:". Do not infer, add external knowledge, or answer if the context does not contain relevant information. | |
3. **Attribution (Optional but Recommended):** When possible, mention the source number (e.g., "ืืคื ืฉืืืคืืข ืืืงืืจ 3") where the information comes from. Do not invent information. Use quotes sparingly and only when essential, quoting the Hebrew text directly. | |
4. **Completeness:** Synthesize information from *multiple* relevant sources if they contribute to the answer. | |
5. **Handling Lack of Information:** If the provided sources do not contain information relevant to the question, state clearly in Hebrew that the provided texts do not contain the answer (e.g., "ืขื ืคื ืืืงืืจืืช ืฉืกืืคืงื, ืืื ืืืืข ืืขื ืืช ืขื ืฉืืื ืื."). Do not attempt to answer based on outside knowledge. | |
6. **Clarity and Conciseness:** Provide a clear, well-structured, and concise answer in Hebrew. Focus on directly answering the user's question. | |
7. **Tone:** Maintain a formal and respectful tone appropriate for discussing religious texts. | |
8. **No Greetings/Closings:** Do not include introductory greetings (e.g., "ืฉืืื") or concluding remarks (e.g., "ืืืจืื", "ืืงืืื ืฉืขืืจืชื"). Focus solely on the answer. | |
""" | |
VALIDATION_PROMPT_TEMPLATE = """ | |
User Question (Hebrew): | |
\"{user_question}\" | |
Text Paragraph (Paragraph {paragraph_index}): | |
Hebrew: | |
--- | |
{hebrew_text} | |
--- | |
English: | |
--- | |
{english_text} | |
--- | |
Instruction: | |
Analyze the Text Paragraph. Determine if it contains information that *directly* answers or significantly contributes to answering the User Question. | |
Respond ONLY with valid JSON: {{\"contains_relevant_info\": boolean, \"justification\": \"Brief Hebrew explanation\"}}. | |
Output only the JSON object. | |
""" | |
# --- Helper Functions --- | |
def check_env_vars(): | |
missing_keys = [] | |
if not LANGSMITH_API_KEY: missing_keys.append("LANGSMITH_API_KEY") | |
if not OPENAI_API_KEY: missing_keys.append("OPENAI_API_KEY") | |
if not PINECONE_API_KEY: missing_keys.append("PINECONE_API_KEY") | |
return missing_keys | |
def configure_langsmith(): | |
os.environ["LANGSMITH_ENDPOINT"] = LANGSMITH_ENDPOINT | |
os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING | |
if LANGSMITH_API_KEY: os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY | |
if LANGSMITH_PROJECT: os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT | |
print(f"LangSmith configured: Endpoint={LANGSMITH_ENDPOINT}, Tracing={LANGSMITH_TRACING}, Project={LANGSMITH_PROJECT or 'Default'}") | |
missing = check_env_vars() | |
if missing: | |
print(f"Warning: Missing essential API keys: {', '.join(missing)}") | |
else: | |
print("All essential API keys found.") | |
configure_langsmith() | |