Spaces:
Running
Running
File size: 4,406 Bytes
ae4184d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import os
from dotenv import load_dotenv
load_dotenv()
# --- LangSmith Configuration ---
LANGSMITH_ENDPOINT = os.environ.get("LANGSMITH_ENDPOINT", "https://api.smith.langchain.com")
LANGSMITH_TRACING = os.environ.get("LANGSMITH_TRACING", "true")
LANGSMITH_API_KEY = os.environ.get("LANGSMITH_API_KEY")
LANGSMITH_PROJECT = os.environ.get("LANGSMITH_PROJECT", "DivreyYoel-RAG-GPT4-Gen")
# --- API Keys (Required) ---
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
# --- Model Configuration ---
EMBEDDING_MODEL = os.environ.get("OPENAI_EMBEDDING_MODEL", "text-embedding-3-large")
OPENAI_VALIDATION_MODEL = os.environ.get("OPENAI_VALIDATION_MODEL", "gpt-4o")
OPENAI_GENERATION_MODEL = os.environ.get("OPENAI_GENERATION_MODEL", "o3")
# --- Pinecone Configuration ---
PINECONE_INDEX_NAME = os.environ.get("PINECONE_INDEX_NAME", "chassidus-index")
# --- Default RAG Pipeline Parameters ---
DEFAULT_N_RETRIEVE = 300
DEFAULT_N_VALIDATE = 100
# --- System Prompts ---
OPENAI_SYSTEM_PROMPT = """You are an expert assistant specializing in Chassidic texts, particularly the works of the Satmar Rebbe, Rabbi Yoel Teitelbaum (Divrei Yoel).
Your task is to answer the user's question based *exclusively* on the provided source text snippets (paragraphs from relevant books). Do not use any prior knowledge or external information.
**Source Text Format:**
The relevant source texts will be provided below under the heading "Source Texts:". Each source is numbered and includes an ID.
**Response Requirements:**
1. **Language:** Respond **exclusively in Hebrew**.
2. **Basis:** Base your answer *strictly* on the information contained within the provided "Source Texts:". Do not infer, add external knowledge, or answer if the context does not contain relevant information.
3. **Attribution (Optional but Recommended):** When possible, mention the source number (e.g., "ืืคื ืฉืืืคืืข ืืืงืืจ 3") where the information comes from. Do not invent information. Use quotes sparingly and only when essential, quoting the Hebrew text directly.
4. **Completeness:** Synthesize information from *multiple* relevant sources if they contribute to the answer.
5. **Handling Lack of Information:** If the provided sources do not contain information relevant to the question, state clearly in Hebrew that the provided texts do not contain the answer (e.g., "ืขื ืคื ืืืงืืจืืช ืฉืกืืคืงื, ืืื ืืืืข ืืขื ืืช ืขื ืฉืืื ืื."). Do not attempt to answer based on outside knowledge.
6. **Clarity and Conciseness:** Provide a clear, well-structured, and concise answer in Hebrew. Focus on directly answering the user's question.
7. **Tone:** Maintain a formal and respectful tone appropriate for discussing religious texts.
8. **No Greetings/Closings:** Do not include introductory greetings (e.g., "ืฉืืื") or concluding remarks (e.g., "ืืืจืื", "ืืงืืื ืฉืขืืจืชื"). Focus solely on the answer.
"""
VALIDATION_PROMPT_TEMPLATE = """
User Question (Hebrew):
\"{user_question}\"
Text Paragraph (Paragraph {paragraph_index}):
Hebrew:
---
{hebrew_text}
---
English:
---
{english_text}
---
Instruction:
Analyze the Text Paragraph. Determine if it contains information that *directly* answers or significantly contributes to answering the User Question.
Respond ONLY with valid JSON: {{\"contains_relevant_info\": boolean, \"justification\": \"Brief Hebrew explanation\"}}.
Output only the JSON object.
"""
# --- Helper Functions ---
def check_env_vars():
missing_keys = []
if not LANGSMITH_API_KEY: missing_keys.append("LANGSMITH_API_KEY")
if not OPENAI_API_KEY: missing_keys.append("OPENAI_API_KEY")
if not PINECONE_API_KEY: missing_keys.append("PINECONE_API_KEY")
return missing_keys
def configure_langsmith():
os.environ["LANGSMITH_ENDPOINT"] = LANGSMITH_ENDPOINT
os.environ["LANGSMITH_TRACING"] = LANGSMITH_TRACING
if LANGSMITH_API_KEY: os.environ["LANGSMITH_API_KEY"] = LANGSMITH_API_KEY
if LANGSMITH_PROJECT: os.environ["LANGSMITH_PROJECT"] = LANGSMITH_PROJECT
print(f"LangSmith configured: Endpoint={LANGSMITH_ENDPOINT}, Tracing={LANGSMITH_TRACING}, Project={LANGSMITH_PROJECT or 'Default'}")
missing = check_env_vars()
if missing:
print(f"Warning: Missing essential API keys: {', '.join(missing)}")
else:
print("All essential API keys found.")
configure_langsmith()
|