File size: 4,406 Bytes
ae4184d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
from dotenv import load_dotenv

load_dotenv()

# --- LangSmith Configuration ---
LANGSMITH_ENDPOINT = os.environ.get("LANGSMITH_ENDPOINT", "https://api.smith.langchain.com")
LANGSMITH_TRACING = os.environ.get("LANGSMITH_TRACING", "true")
LANGSMITH_API_KEY = os.environ.get("LANGSMITH_API_KEY")
LANGSMITH_PROJECT = os.environ.get("LANGSMITH_PROJECT", "DivreyYoel-RAG-GPT4-Gen")

# --- API Keys (Required) ---
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")

# --- Model Configuration ---
EMBEDDING_MODEL = os.environ.get("OPENAI_EMBEDDING_MODEL", "text-embedding-3-large")
OPENAI_VALIDATION_MODEL = os.environ.get("OPENAI_VALIDATION_MODEL", "gpt-4o")
OPENAI_GENERATION_MODEL = os.environ.get("OPENAI_GENERATION_MODEL", "o3")

# --- Pinecone Configuration ---
PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX_NAME", "chassidus-index")

# --- Default RAG Pipeline Parameters ---
DEFAULT_N_RETRIEVE = 300
DEFAULT_N_VALIDATE = 100

# --- System Prompts ---
OPENAI_SYSTEM_PROMPT = """You are an expert assistant specializing in Chassidic texts, particularly the works of the Satmar Rebbe, Rabbi Yoel Teitelbaum (Divrei Yoel).
Your task is to answer the user's question based *exclusively* on the provided source text snippets (paragraphs from relevant books). Do not use any prior knowledge or external information.

**Source Text Format:**
The relevant source texts will be provided below under the heading "Source Texts:". Each source is numbered and includes an ID.

**Response Requirements:**
1.  **Language:** Respond **exclusively in Hebrew**.
2.  **Basis:** Base your answer *strictly* on the information contained within the provided "Source Texts:". Do not infer, add external knowledge, or answer if the context does not contain relevant information.
3.  **Attribution (Optional but Recommended):** When possible, mention the source number (e.g., "ื›ืคื™ ืฉืžื•ืคื™ืข ื‘ืžืงื•ืจ 3") where the information comes from. Do not invent information. Use quotes sparingly and only when essential, quoting the Hebrew text directly.
4.  **Completeness:** Synthesize information from *multiple* relevant sources if they contribute to the answer.
5.  **Handling Lack of Information:** If the provided sources do not contain information relevant to the question, state clearly in Hebrew that the provided texts do not contain the answer (e.g., "ืขืœ ืคื™ ื”ืžืงื•ืจื•ืช ืฉืกื•ืคืงื•, ืื™ืŸ ืžื™ื“ืข ืœืขื ื•ืช ืขืœ ืฉืืœื” ื–ื•."). Do not attempt to answer based on outside knowledge.
6.  **Clarity and Conciseness:** Provide a clear, well-structured, and concise answer in Hebrew. Focus on directly answering the user's question.
7.  **Tone:** Maintain a formal and respectful tone appropriate for discussing religious texts.
8.  **No Greetings/Closings:** Do not include introductory greetings (e.g., "ืฉืœื•ื") or concluding remarks (e.g., "ื‘ื‘ืจื›ื”", "ืžืงื•ื•ื” ืฉืขื–ืจืชื™"). Focus solely on the answer.
"""

VALIDATION_PROMPT_TEMPLATE = """
User Question (Hebrew):
\"{user_question}\"

Text Paragraph (Paragraph {paragraph_index}):
Hebrew:
---
{hebrew_text}
---
English:
---
{english_text}
---

Instruction:
Analyze the Text Paragraph. Determine if it contains information that *directly* answers or significantly contributes to answering the User Question.
Respond ONLY with valid JSON: {{\"contains_relevant_info\": boolean, \"justification\": \"Brief Hebrew explanation\"}}.
Output only the JSON object.
"""

# --- Helper Functions ---
def check_env_vars():
    missing_keys = []
    if not LANGSMITH_API_KEY: missing_keys.append("LANGSMITH_API_KEY")
    if not OPENAI_API_KEY: missing_keys.append("OPENAI_API_KEY")
    if not PINECONE_API_KEY: missing_keys.append("PINECONE_API_KEY")
    return missing_keys

def configure_langsmith():
    os.environ["LANGSMITH_ENDPOINT"] = LANGSMITH_ENDPOINT
    os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING
    if LANGSMITH_API_KEY: os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY
    if LANGSMITH_PROJECT: os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT
    print(f"LangSmith configured: Endpoint={LANGSMITH_ENDPOINT}, Tracing={LANGSMITH_TRACING}, Project={LANGSMITH_PROJECT or 'Default'}")

missing = check_env_vars()
if missing:
    print(f"Warning: Missing essential API keys: {', '.join(missing)}")
else:
    print("All essential API keys found.")

configure_langsmith()