aghoraguru commited on
Commit
c0d6e80
·
1 Parent(s): 341cd12

Add Dockerfile and requirements for FastAPI application

Browse files
Files changed (4) hide show
  1. Dockerfile +29 -0
  2. chroma_db/chroma.sqlite3 +0 -0
  3. main.py +838 -0
  4. requirements.txt +28 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a lightweight Python image
2
+ FROM python:3.9-slim
3
+
4
+ # Set environment variables to prevent interactive prompts
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV PIP_NO_CACHE_DIR=1
7
+
8
+ # Set the working directory
9
+ WORKDIR /app
10
+
11
+ # Copy only necessary files
12
+ COPY ./chroma_db /app/chroma_db
13
+ COPY main.py /app/main.py
14
+ COPY requirements.txt /app/requirements.txt
15
+
16
+ # Install system-level dependencies (e.g., for Chroma)
17
+ RUN apt-get update && apt-get install -y \
18
+ build-essential \
19
+ libsqlite3-dev \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # Install Python dependencies
23
+ RUN pip install --upgrade pip && pip install -r requirements.txt
24
+
25
+ # Expose port 8000 for FastAPI
26
+ EXPOSE 8000
27
+
28
+ # Run the FastAPI app with uvicorn
29
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
chroma_db/chroma.sqlite3 ADDED
Binary file (168 kB). View file
 
main.py ADDED
@@ -0,0 +1,838 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ import json
4
+ import uuid
5
+ from typing import Dict, List, Optional, Any, Union
6
+
7
+ from datetime import datetime
8
+
9
+ # FastAPI and related imports
10
+ from fastapi import (
11
+ FastAPI,
12
+ WebSocket,
13
+ WebSocketDisconnect,
14
+ HTTPException,
15
+ Body,
16
+ Query,
17
+ Depends
18
+ )
19
+ from fastapi.middleware.cors import CORSMiddleware
20
+ from pydantic import BaseModel, EmailStr
21
+ from dotenv import load_dotenv
22
+
23
+ # LangChain / RAG Pipeline Imports (placeholder imports—adjust for your project)
24
+ from langchain_core.documents import Document
25
+ from langchain_community.vectorstores import Chroma
26
+ from langchain_openai import OpenAIEmbeddings, ChatOpenAI
27
+ from langchain_core.tools import tool
28
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
29
+ from langchain_community.document_loaders import DirectoryLoader
30
+ from langgraph.graph import StateGraph, START, END
31
+ from typing_extensions import TypedDict
32
+ from bs4 import BeautifulSoup
33
+ import requests
34
+
35
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
36
+ from fastapi import Depends
37
+
38
+ # Supabase
39
+ from supabase import create_client, Client
40
+
41
+ ###############################################################################
42
+ # ENV & LOGGING SETUP
43
+ ###############################################################################
44
+ load_dotenv()
45
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
46
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
47
+ SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY")
48
+ SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
49
+
50
+ logging.basicConfig(
51
+ level=logging.INFO,
52
+ format="%(asctime)s [%(levelname)s] %(name)s - %(message)s"
53
+ )
54
+
55
+ if not OPENAI_API_KEY:
56
+ raise ValueError("Missing OPENAI_API_KEY in environment!")
57
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
58
+
59
+ ###############################################################################
60
+ # SUPABASE CREDENTIALS & CLIENT INITIALIZATION
61
+ ###############################################################################
62
+
63
+ SUPABASE_URL = os.getenv("SUPABASE_URL")
64
+ SUPABASE_ANON_KEY = os.getenv("SUPABASE_ANON_KEY")
65
+ SUPABASE_SERVICE_ROLE_KEY = os.getenv("SUPABASE_SERVICE_ROLE_KEY")
66
+
67
+ supabase_client: Client = create_client(SUPABASE_URL, SUPABASE_ANON_KEY)
68
+ supabase_admin: Client = create_client(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY)
69
+
70
+ ###############################################################################
71
+ # OPTIONAL: CREATE TABLES / SCHEMA
72
+ ###############################################################################
73
+ def create_db_schema() -> None:
74
+ """
75
+ You can run this function ONCE in a safe admin environment to create
76
+ the necessary tables in your Supabase Postgres database (if they do not exist).
77
+ """
78
+ schema_sql = """
79
+ -- Enable UUID generation if not enabled
80
+ CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
81
+
82
+ CREATE TABLE IF NOT EXISTS public.users (
83
+ id uuid DEFAULT uuid_generate_v4() PRIMARY KEY,
84
+ created_at timestamp with time zone DEFAULT now(),
85
+ email text UNIQUE NOT NULL,
86
+ password_hash text,
87
+ full_name text,
88
+ last_login timestamp with time zone,
89
+ role text DEFAULT 'user'
90
+ );
91
+
92
+ CREATE TABLE IF NOT EXISTS public.chats (
93
+ chat_id uuid DEFAULT uuid_generate_v4() PRIMARY KEY,
94
+ user_id uuid REFERENCES public.users (id) ON DELETE CASCADE,
95
+ created_at timestamp with time zone DEFAULT now(),
96
+ title text,
97
+ last_updated timestamp with time zone DEFAULT now()
98
+ );
99
+
100
+ CREATE TABLE IF NOT EXISTS public.chat_session (
101
+ session_id uuid DEFAULT uuid_generate_v4() PRIMARY KEY,
102
+ chat_id uuid REFERENCES public.chats (chat_id) ON DELETE CASCADE,
103
+ created_at timestamp with time zone DEFAULT now(),
104
+ updated_at timestamp with time zone DEFAULT now(),
105
+ content jsonb DEFAULT '{}'::jsonb
106
+ );
107
+
108
+ CREATE TABLE IF NOT EXISTS public.logs (
109
+ log_id uuid DEFAULT uuid_generate_v4() PRIMARY KEY,
110
+ session_id uuid REFERENCES public.chat_session (session_id) ON DELETE CASCADE,
111
+ timestamp timestamp with time zone DEFAULT now(),
112
+ event_type text,
113
+ details jsonb DEFAULT '{}'::jsonb
114
+ );
115
+
116
+ CREATE TABLE IF NOT EXISTS public.ai_thought_table (
117
+ id uuid DEFAULT uuid_generate_v4() PRIMARY KEY,
118
+ created_at timestamp with time zone DEFAULT now(),
119
+ session_id uuid REFERENCES public.chat_session (session_id) ON DELETE CASCADE,
120
+ thought_process text,
121
+ decision_making jsonb DEFAULT '{}'::jsonb
122
+ );
123
+ """
124
+
125
+ logging.info("Schema creation SQL:\n%s", schema_sql)
126
+ # You can run this SQL in Supabase's SQL Editor, or use an RPC if you have one:
127
+ # supabase_admin.rpc('execute_sql', {'q': schema_sql}).execute()
128
+ # Or manually run it in your project's SQL editor.
129
+ pass
130
+
131
+ ###############################################################################
132
+ # FASTAPI APP
133
+ ###############################################################################
134
+ app = FastAPI(
135
+ title="RAG-GENAI-Women",
136
+ version="1.0.0",
137
+ description=(
138
+ "A production-ready pipeline with session-based JSON storage, plus "
139
+ "auth endpoints for SignUp, Login, and more. "
140
+ "Supports multiple concurrent WebSocket connections (one per session)."
141
+ )
142
+ )
143
+
144
+ app.add_middleware(
145
+ CORSMiddleware,
146
+ allow_origins=["*"], # Restrict in production
147
+ allow_credentials=True,
148
+ allow_methods=["*"],
149
+ allow_headers=["*"],
150
+ )
151
+
152
+ # Add security scheme
153
+ security = HTTPBearer()
154
+ ###############################################################################
155
+ # LLM & VECTOR STORE SETUP
156
+ ###############################################################################
157
+ embeddings_model = OpenAIEmbeddings(model="text-embedding-3-large")
158
+ llm = ChatOpenAI(model="gpt-4o") # Example placeholder name
159
+ llm_decision_maker = ChatOpenAI(model="gpt-4o-mini")
160
+
161
+ vector_store = Chroma(
162
+ persist_directory="./chroma_db",
163
+ embedding_function=embeddings_model
164
+ )
165
+
166
+ def get_time_date() -> str:
167
+ return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
168
+
169
+ def get_country_from_ip() -> str:
170
+ # Stub; in production, do a real IP lookup
171
+ return "India"
172
+
173
+ ###############################################################################
174
+ # WEB SEARCH TOOL
175
+ ###############################################################################
176
+ @tool
177
+ def web_search_tool(query: str) -> Dict[str, Any]:
178
+ """
179
+ Perform a web search and return a single dictionary:
180
+ {"results": [...], "count": <int>}
181
+ """
182
+ from googlesearch import search
183
+
184
+ results = []
185
+ try:
186
+ for url in search(query, num_results=3):
187
+ try:
188
+ resp = requests.get(url, timeout=10)
189
+ soup = BeautifulSoup(resp.text, "html.parser")
190
+ snippet = soup.get_text()[:1000]
191
+ results.append({"url": url, "content": snippet})
192
+ except Exception as e:
193
+ logging.exception("Error fetching content from: %s", url)
194
+ results.append({"url": url, "content": f"Error: {str(e)}"})
195
+ except Exception as e:
196
+ logging.error("Error performing search: %s", e)
197
+
198
+ return {"results": results, "count": len(results)}
199
+
200
+ ###############################################################################
201
+ # RAG PIPELINE
202
+ ###############################################################################
203
+ class State(TypedDict):
204
+ question: str
205
+ retrieved_context: List[Document]
206
+ demographic_context: str
207
+ web_search_needed: int
208
+ web_search_results: List[dict]
209
+ final_answer: str
210
+ tone: str
211
+
212
+ def add_demographic_context(state: State):
213
+ country = get_country_from_ip()
214
+ timestamp = get_time_date()
215
+ demo = f"User from {country} at {timestamp}"
216
+ logging.info(f"[add_demographic_context] {demo}")
217
+ return {"demographic_context": demo}
218
+
219
+ def retrieve(state: State):
220
+ logging.info("[retrieve] Searching vector store...")
221
+ user_query = state["question"]
222
+ docs = vector_store.similarity_search(user_query)
223
+ combined_text = "\n\n".join(doc.page_content for doc in docs)
224
+
225
+ tone = state.get("tone", "detailed")
226
+ sys_msg = (
227
+ "You are an assistant extracting key points. Focus on relevant details. Think like a lawyer and search for relevant details."
228
+ if tone != "casual" else
229
+ "You are an assistant extracting key points in a conversational manner. Think like a lawyer and search for relevant details."
230
+ )
231
+
232
+ prompt = [
233
+ {"role": "system", "content": sys_msg},
234
+ {
235
+ "role": "user",
236
+ "content": (
237
+ f"Query:\n{user_query}\n\nDocs:\n{combined_text}\n"
238
+ "Extract relevant points."
239
+ )
240
+ }
241
+ ]
242
+ resp = llm.invoke(prompt)
243
+ extracted = resp.content.strip()
244
+
245
+ return {"retrieved_context": [Document(page_content=extracted, metadata={"source": "filtered"})]}
246
+
247
+ def decide_web_search(state: State):
248
+ logging.info("[decide_web_search]")
249
+ retrieved_text = "\n\n".join(doc.page_content for doc in state["retrieved_context"])
250
+
251
+ messages = [
252
+ {
253
+ "role": "system",
254
+ "content": (
255
+ "You are a decision-making assistant. "
256
+ "Respond strictly with '1' if a web search is required, or '0' if not."
257
+ )
258
+ },
259
+ {
260
+ "role": "user",
261
+ "content": f"Question:\n{state['question']}\n\nContext:\n{retrieved_text}"
262
+ },
263
+ ]
264
+
265
+ response = llm_decision_maker.invoke(messages)
266
+ decision = response.content.strip()
267
+
268
+ logging.info(f"[decide_web_search] LLM decision raw: {decision}")
269
+
270
+ try:
271
+ return {"web_search_needed": int(decision)}
272
+ except ValueError:
273
+ logging.error(f"Invalid decision response: {decision}")
274
+ raise ValueError(f"Unexpected LLM response for web search decision: {decision}")
275
+
276
+ def perform_web_search(state: State):
277
+ need_search = state.get("web_search_needed", 0)
278
+ if need_search == 1:
279
+ logging.info("[perform_web_search] Searching the web...")
280
+ query = f"{state['question']} ({state['demographic_context']})"
281
+ search_data = web_search_tool.invoke(query) # returns a dict
282
+ structured_results = search_data["results"]
283
+ summarized_results = []
284
+ for r in structured_results:
285
+ c = r["content"]
286
+ sum_prompt = [
287
+ {"role": "system", "content": "Summarize the content with short citation."},
288
+ {"role": "user", "content": f"{c}\nURL: {r['url']}"}
289
+ ]
290
+ sum_resp = llm.invoke(sum_prompt)
291
+ summarized_results.append({
292
+ "url": r["url"],
293
+ "summary": sum_resp.content.strip()
294
+ })
295
+ return {"web_search_results": summarized_results}
296
+ else:
297
+ logging.info("[perform_web_search] Skipping web search...")
298
+ return {"web_search_results": []}
299
+
300
+ def consolidate(state: State):
301
+ logging.info("[consolidate] Generating final answer...")
302
+ retrieved_text = "\n\n".join(doc.page_content for doc in state["retrieved_context"])
303
+ web_data = state.get("web_search_results", [])
304
+
305
+ sources_text = "\n".join(
306
+ f"URL: {r['url']}\nSummary: {r['summary']}" for r in web_data
307
+ )
308
+ tone = state.get("tone", "detailed")
309
+ sys_msg = (
310
+ "You are a precise assistant. Combine context and results into a final answer."
311
+ if tone != "casual" else
312
+ "You are a friendly assistant. Combine context and results in a final manner."
313
+ )
314
+
315
+ final_prompt = [
316
+ {"role": "system", "content": sys_msg},
317
+ {
318
+ "role": "user",
319
+ "content": (
320
+ f"Question:\n{state['question']}\n\n"
321
+ f"Retrieved:\n{retrieved_text}\n\n"
322
+ f"Web:\n{sources_text}\n\n"
323
+ "Give a comprehensive final answer."
324
+ )
325
+ }
326
+ ]
327
+ resp = llm.invoke(final_prompt)
328
+ raw_ans = resp.content.strip()
329
+
330
+ # Summarize for chat
331
+ summ_prompt = [
332
+ {
333
+ "role": "system",
334
+ "content": "Provide a concise version of the answer, preserving key details."
335
+ },
336
+ {
337
+ "role": "user",
338
+ "content": raw_ans
339
+ }
340
+ ]
341
+ s_resp = llm.invoke(summ_prompt)
342
+ chat_ans = s_resp.content.strip()
343
+
344
+ final = {
345
+ "crunched_summary": chat_ans,
346
+ "full_answer": raw_ans,
347
+ "sources": web_data if web_data else None,
348
+ "source_type": (
349
+ "Web + Retrieved" if web_data and retrieved_text
350
+ else "Web" if web_data
351
+ else "Retrieved"
352
+ )
353
+ }
354
+ return {"final_answer": final}
355
+
356
+ ###############################################################################
357
+ # PIPELINE GRAPH BUILD
358
+ ###############################################################################
359
+ graph_builder = StateGraph(State).add_sequence([
360
+ add_demographic_context,
361
+ retrieve,
362
+ decide_web_search,
363
+ perform_web_search,
364
+ consolidate
365
+ ])
366
+ graph_builder.add_edge(START, "add_demographic_context")
367
+ graph_builder.add_edge("add_demographic_context", "retrieve")
368
+ graph_builder.add_edge("retrieve", "decide_web_search")
369
+ graph_builder.add_edge("decide_web_search", "perform_web_search")
370
+ graph_builder.add_edge("perform_web_search", "consolidate")
371
+ graph_builder.add_edge("consolidate", END)
372
+
373
+ pipeline_graph = graph_builder.compile()
374
+
375
+ ###############################################################################
376
+ # SESSION-BASED JSON STORAGE
377
+ ###############################################################################
378
+ SESSIONS_DIR = "sessions_data"
379
+ os.makedirs(SESSIONS_DIR, exist_ok=True)
380
+
381
+ def generate_session_id() -> str:
382
+ return str(uuid.uuid4())
383
+
384
+ def get_session_file(session_id: str) -> str:
385
+ return os.path.join(SESSIONS_DIR, f"{session_id}.json")
386
+
387
+ def load_session_from_json(session_id: str) -> dict:
388
+ """Load or create session data from JSON."""
389
+ path = get_session_file(session_id)
390
+ if os.path.exists(path):
391
+ with open(path, "r", encoding="utf-8") as f:
392
+ return json.load(f)
393
+ else:
394
+ data = {
395
+ "session_id": session_id,
396
+ "started_at": get_time_date(),
397
+ "messages": []
398
+ }
399
+ with open(path, "w", encoding="utf-8") as f:
400
+ json.dump(data, f, indent=2)
401
+ return data
402
+
403
+ def save_session_to_json(session_data: dict):
404
+ session_id = session_data["session_id"]
405
+ path = get_session_file(session_id)
406
+ with open(path, "w", encoding="utf-8") as f:
407
+ json.dump(session_data, f, indent=2)
408
+
409
+ def append_message(session_id: str, role: str, content: str):
410
+ data = load_session_from_json(session_id)
411
+ data["messages"].append({
412
+ "role": role,
413
+ "content": content,
414
+ "timestamp": get_time_date()
415
+ })
416
+ save_session_to_json(data)
417
+
418
+ ###############################################################################
419
+ # AUTH & USER MODELS
420
+ ###############################################################################
421
+ class SignupRequest(BaseModel):
422
+ email: EmailStr
423
+ password: str
424
+ full_name: Optional[str] = None
425
+
426
+ class SignupResponse(BaseModel):
427
+ user_id: Optional[str]
428
+ message: str
429
+
430
+ class LoginRequest(BaseModel):
431
+ email: EmailStr
432
+ password: str
433
+
434
+ class LoginResponse(BaseModel):
435
+ access_token: Optional[str]
436
+ token_type: str = "bearer"
437
+ user_id: Optional[str]
438
+ message: str
439
+
440
+ class LogoutResponse(BaseModel):
441
+ message: str
442
+
443
+ class Identity(BaseModel):
444
+ provider: str
445
+ identity_id: str
446
+ created_at: Union[datetime, str]
447
+ last_sign_in_at: Union[datetime, str]
448
+
449
+ class UserProfile(BaseModel):
450
+ user_id: str
451
+ email: str
452
+ full_name: Optional[str]
453
+ role: str
454
+ created_at: datetime
455
+ updated_at: Optional[datetime]
456
+ last_sign_in_at: Optional[datetime]
457
+ email_verified: bool
458
+ phone_verified: bool
459
+ is_anonymous: bool
460
+ app_metadata: Dict[str, Union[str, List[str]]]
461
+ user_metadata: Dict[str, Union[str, bool]]
462
+ identities: List[Identity]
463
+
464
+
465
+ ###############################################################################
466
+ # HTTP MODELS
467
+ ###############################################################################
468
+ class AskRequest(BaseModel):
469
+ user_input: str
470
+ tone: Optional[str] = "detailed"
471
+
472
+ class AskResponse(BaseModel):
473
+ session_id: str
474
+ message: str
475
+
476
+ ###############################################################################
477
+ # HTTP AUTH ENDPOINTS
478
+ ###############################################################################
479
+ @app.post("/auth/signup", response_model=SignupResponse)
480
+ def signup(payload: SignupRequest):
481
+ """
482
+ Sign up a new user using Supabase Auth.
483
+ Optionally store extra info (e.g., full_name) in your custom 'users' table.
484
+ """
485
+ # 1) Use Supabase Auth to create the user
486
+ try:
487
+ result = supabase_client.auth.sign_up(
488
+ {
489
+ "email": payload.email,
490
+ "password": payload.password
491
+ }
492
+ )
493
+ except Exception as e:
494
+ logging.exception("[signup] Error from Supabase Auth sign_up")
495
+ return SignupResponse(user_id=None, message=f"Sign up failed: {str(e)}")
496
+
497
+ if result.user is None:
498
+ # Possibly means "Confirm email" is enabled, user needs to verify
499
+ return SignupResponse(
500
+ user_id=None,
501
+ message="User created, but email confirmation required."
502
+ )
503
+
504
+ # 2) The user is created in supabase.auth. We can optionally store extra data
505
+ user_id = result.user.id
506
+ full_name = payload.full_name if payload.full_name else ""
507
+ now = datetime.utcnow()
508
+
509
+ # Attempt to store in our custom 'users' table
510
+ try:
511
+ insert_res = supabase_admin.table("users").insert({
512
+ "id": user_id,
513
+ "email": payload.email,
514
+ "password_hash": "N/A (Using Supabase Auth)",
515
+ "full_name": full_name,
516
+ "created_at": now.isoformat(),
517
+ "last_login": None,
518
+ "role": "user"
519
+ }).execute()
520
+ logging.info("[signup] Inserted custom user record: %s", insert_res.data)
521
+ except Exception as e:
522
+ logging.exception("[signup] Error inserting into 'users' table")
523
+
524
+ return SignupResponse(user_id=user_id, message="Sign up successful.")
525
+
526
+ @app.post("/auth/login", response_model=LoginResponse)
527
+ def login(payload: LoginRequest):
528
+ """
529
+ Log in an existing user with Supabase Auth.
530
+ Return the access_token, which you can store on client side for usage,
531
+ or rely on same-site cookies if you have it configured.
532
+ """
533
+ try:
534
+ result = supabase_client.auth.sign_in_with_password(
535
+ {
536
+ "email": payload.email,
537
+ "password": payload.password
538
+ }
539
+ )
540
+ if result.user is None:
541
+ return LoginResponse(
542
+ access_token=None,
543
+ user_id=None,
544
+ message="Login failed: invalid credentials or user not confirmed."
545
+ )
546
+
547
+ user_id = result.user.id
548
+ access_token = result.session.access_token if result.session else None
549
+
550
+ # We can track "last_login" in our custom table:
551
+ now = datetime.utcnow()
552
+ try:
553
+ supabase_admin.table("users").update({
554
+ "last_login": now.isoformat()
555
+ }).eq("id", user_id).execute()
556
+ except Exception as e:
557
+ logging.exception("[login] Error updating last_login in 'users' table")
558
+
559
+ return LoginResponse(
560
+ access_token=access_token,
561
+ user_id=user_id,
562
+ message="Login success."
563
+ )
564
+ except Exception as e:
565
+ logging.exception("[login] Error from Supabase Auth sign_in_with_password")
566
+ return LoginResponse(
567
+ access_token=None,
568
+ user_id=None,
569
+ message=f"Login error: {str(e)}"
570
+ )
571
+
572
+ @app.post("/auth/logout", response_model=LogoutResponse)
573
+ def logout():
574
+ """
575
+ Invalidate the user's session if you are storing it on the server
576
+ or using persistent session management. For token-based approach,
577
+ you can have the client discard the token and possibly call
578
+ supabase_client.auth.sign_out() as well.
579
+ """
580
+ try:
581
+ # This will revoke the refresh token from Supabase's perspective
582
+ supabase_client.auth.sign_out()
583
+ return LogoutResponse(message="Logout successful.")
584
+ except Exception as e:
585
+ logging.exception("[logout] Error from Supabase Auth sign_out")
586
+ raise HTTPException(status_code=500, detail="Logout failed.")
587
+
588
+
589
+ @app.get("/auth/me", response_model=UserProfile)
590
+ def get_current_user(credentials: HTTPAuthorizationCredentials = Depends(security)):
591
+ """
592
+ Retrieve info about the currently logged-in user.
593
+ """
594
+ try:
595
+ # Extract access token from Authorization header
596
+ access_token = credentials.credentials
597
+
598
+ # Retrieve user details using the access token
599
+ user_response = supabase_client.auth.get_user(access_token)
600
+ if not user_response or not user_response.user:
601
+ raise HTTPException(status_code=401, detail="User not authenticated.")
602
+
603
+ user = user_response.user
604
+
605
+ # Optionally fetch additional data from your custom `users` table
606
+ res = supabase_client.table("users").select("*").eq("id", user.id).single().execute()
607
+ record = res.data
608
+
609
+ # Construct the UserProfile response
610
+ return UserProfile(
611
+ user_id=user.id,
612
+ email=user.email,
613
+ full_name=record.get("full_name") if record else None,
614
+ role=user.role,
615
+ created_at=user.created_at,
616
+ updated_at=user.updated_at,
617
+ last_sign_in_at=user.last_sign_in_at,
618
+ email_verified=user.user_metadata.get("email_verified", False),
619
+ phone_verified=user.user_metadata.get("phone_verified", False),
620
+ is_anonymous=user.is_anonymous,
621
+ app_metadata=user.app_metadata,
622
+ user_metadata=user.user_metadata,
623
+ identities=[
624
+ Identity(
625
+ provider=identity.provider,
626
+ identity_id=identity.identity_id,
627
+ created_at=str(identity.created_at) if isinstance(identity.created_at, datetime) else identity.created_at,
628
+ last_sign_in_at=str(identity.last_sign_in_at) if isinstance(identity.last_sign_in_at, datetime) else identity.last_sign_in_at,
629
+ )
630
+ for identity in user.identities
631
+ ] if user.identities else []
632
+ )
633
+ except Exception as e:
634
+ logging.exception("[get_current_user] Error retrieving user info")
635
+ raise HTTPException(status_code=500, detail=str(e))
636
+
637
+
638
+ @app.get("/auth/confirm")
639
+ def confirm_email(
640
+ access_token: str = Query(...),
641
+ refresh_token: str = Query(...),
642
+ expires_in: int = Query(...),
643
+ token_type: str = Query(...)
644
+ ):
645
+ """
646
+ Endpoint to handle confirmation links sent via email.
647
+ """
648
+ try:
649
+ # Use Supabase client to retrieve and confirm the user
650
+ result = supabase_client.auth.get_user(access_token)
651
+ if result.user:
652
+ return {"status": "success", "message": "Email confirmed successfully.", "user": result.user}
653
+ else:
654
+ raise HTTPException(status_code=400, detail="Invalid or expired confirmation link.")
655
+ except Exception as e:
656
+ logging.exception("[confirm_email] Error during confirmation")
657
+ raise HTTPException(status_code=500, detail=str(e))
658
+
659
+
660
+ ###############################################################################
661
+ # HTTP ENDPOINTS
662
+ ###############################################################################
663
+ @app.get("/health")
664
+ def health_check():
665
+ """Simple health check endpoint."""
666
+ return {"status": "ok", "message": "Service is healthy."}
667
+
668
+ @app.post("/ask", response_model=AskResponse)
669
+ def ask_endpoint(payload: AskRequest):
670
+ """
671
+ Optional endpoint to create a session or store the first user message
672
+ before switching to WebSockets.
673
+ """
674
+ session_id = generate_session_id()
675
+ user_input = payload.user_input
676
+ append_message(session_id, "user", user_input)
677
+
678
+ return AskResponse(
679
+ session_id=session_id,
680
+ message="Session created. Connect via WS to continue."
681
+ )
682
+
683
+ @app.post("/reset")
684
+ def reset_session(session_id: str = Body(..., embed=True)):
685
+ """
686
+ Deletes the session JSON file, effectively resetting the conversation.
687
+ """
688
+ path = get_session_file(session_id)
689
+ if os.path.exists(path):
690
+ os.remove(path)
691
+ return {"status": "ok", "message": f"Session {session_id} reset."}
692
+ else:
693
+ raise HTTPException(status_code=404, detail="Session not found.")
694
+
695
+ ###############################################################################
696
+ # WEBSOCKET CONCURRENCY
697
+ ###############################################################################
698
+ class ConnectionManager:
699
+ """
700
+ Manages EXACTLY ONE active WebSocket per session_id.
701
+ If a new WebSocket for the same session_id arrives,
702
+ it closes the old connection first.
703
+ """
704
+ def __init__(self):
705
+ self.active_connections: Dict[str, WebSocket] = {}
706
+
707
+ async def connect(self, session_id: str, websocket: WebSocket):
708
+ # If there's already an active socket for this session, close it
709
+ if session_id in self.active_connections:
710
+ old_ws = self.active_connections[session_id]
711
+ logging.info(f"[WS] Closing old connection for session {session_id} to allow new one.")
712
+ await old_ws.close(code=4000, reason="Replaced by a new connection")
713
+
714
+ logging.info(f"[WS] Accepting WebSocket for session: {session_id}")
715
+ await websocket.accept()
716
+
717
+ self.active_connections[session_id] = websocket
718
+ logging.info(f"[WS] Session {session_id} connected. "
719
+ f"Total active sessions: {len(self.active_connections)}")
720
+
721
+ def disconnect(self, session_id: str, websocket: WebSocket):
722
+ stored_ws = self.active_connections.get(session_id)
723
+ if stored_ws is websocket:
724
+ del self.active_connections[session_id]
725
+ logging.info(f"[WS] Session {session_id} disconnected. "
726
+ f"Remaining active sessions: {len(self.active_connections)}")
727
+
728
+ async def send_json(self, session_id: str, data: dict):
729
+ ws = self.active_connections.get(session_id)
730
+ if ws is not None:
731
+ await ws.send_json(data)
732
+
733
+ manager = ConnectionManager()
734
+
735
+ @app.websocket("/ws")
736
+ async def websocket_endpoint(
737
+ websocket: WebSocket,
738
+ session_id: Optional[str] = Query(None),
739
+ tone: str = Query("detailed")
740
+ ):
741
+ """
742
+ WebSocket endpoint.
743
+ - The user can pass `session_id` and `tone` as query parameters, e.g.:
744
+ ws://localhost:8000/ws?session_id=abc-123&tone=casual
745
+ - Or omit `session_id` to generate one automatically.
746
+ - Each message from client must be JSON with {"user_input": "..."}.
747
+ """
748
+ if not session_id:
749
+ session_id = generate_session_id()
750
+ logging.info(f"[WS] No session_id provided. Created new: {session_id}")
751
+
752
+ await manager.connect(session_id, websocket)
753
+
754
+ while True:
755
+ try:
756
+ data = await websocket.receive_json()
757
+ user_input = data.get("user_input", "")
758
+ append_message(session_id, "user", user_input)
759
+
760
+ session_data = load_session_from_json(session_id)
761
+ conversation_text = ""
762
+ for msg in session_data["messages"]:
763
+ role_name = msg["role"].capitalize()
764
+ conversation_text += f"{role_name}: {msg['content']}\n"
765
+
766
+ chain_state = {
767
+ "question": conversation_text,
768
+ "tone": tone
769
+ }
770
+
771
+ await manager.send_json(session_id, {
772
+ "type": "status",
773
+ "message": "Starting pipeline..."
774
+ })
775
+
776
+ try:
777
+ async for step_result in pipeline_graph.astream(chain_state, stream_mode="values"):
778
+ if "demographic_context" in step_result:
779
+ await manager.send_json(session_id, {
780
+ "type": "status",
781
+ "message": f"Demographic: {step_result['demographic_context']}"
782
+ })
783
+ if "retrieved_context" in step_result:
784
+ excerpt = step_result["retrieved_context"][0].page_content[:60]
785
+ await manager.send_json(session_id, {
786
+ "type": "status",
787
+ "message": f"Retrieved context: {excerpt}..."
788
+ })
789
+ if "web_search_needed" in step_result:
790
+ await manager.send_json(session_id, {
791
+ "type": "status",
792
+ "message": f"Web search needed = {step_result['web_search_needed']}"
793
+ })
794
+ if "web_search_results" in step_result:
795
+ count = len(step_result["web_search_results"])
796
+ await manager.send_json(session_id, {
797
+ "type": "status",
798
+ "message": f"Web search returned {count} results."
799
+ })
800
+ if "final_answer" in step_result:
801
+ final_ans = step_result["final_answer"]
802
+ short_answer = final_ans["crunched_summary"]
803
+ append_message(session_id, "assistant", short_answer)
804
+
805
+ await manager.send_json(session_id, {
806
+ "type": "final_answer",
807
+ "short_answer": short_answer,
808
+ "full_answer": final_ans["full_answer"],
809
+ "sources": final_ans["sources"],
810
+ "source_type": final_ans["source_type"]
811
+ })
812
+
813
+ except Exception as e:
814
+ logging.exception("[WS] Error during pipeline streaming.")
815
+ await manager.send_json(session_id, {
816
+ "type": "error",
817
+ "message": str(e)
818
+ })
819
+
820
+ except WebSocketDisconnect:
821
+ logging.info(f"[WS] Client disconnected for session {session_id}")
822
+ manager.disconnect(session_id, websocket)
823
+ break
824
+ except Exception as e:
825
+ logging.exception("[WS] Error reading JSON from WebSocket.")
826
+ await manager.send_json(session_id, {
827
+ "type": "error",
828
+ "message": str(e)
829
+ })
830
+ # Not disconnecting immediately—client may continue with valid input
831
+
832
+ ###############################################################################
833
+ # LOCAL DEV ENTRY POINT
834
+ ###############################################################################
835
+ # if __name__ == "__main__":
836
+ # import uvicorn
837
+ # # Uncomment if you want to log out or run the DDL
838
+ # uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)
requirements.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ bs4==0.0.2
3
+ chromadb==0.5.23
4
+ dill==0.3.9
5
+ effdet==0.4.1
6
+ email_validator==2.2.0
7
+ google-cloud-vision==3.9.0
8
+ googlesearch-python==1.2.5
9
+ grpcio-status==1.68.1
10
+ h2==4.1.0
11
+ httptools==0.6.4
12
+ ipykernel==6.29.5
13
+ langchain-community==0.3.13
14
+ langchain-openai==0.2.14
15
+ langchainhub==0.1.21
16
+ langgraph==0.2.60
17
+ motor==3.6.0
18
+ pi_heif==0.21.0
19
+ pikepdf==9.4.2
20
+ pip==24.2
21
+ setuptools==75.1.0
22
+ supabase==2.10.0
23
+ unstructured==0.16.11
24
+ unstructured-inference==0.8.1
25
+ unstructured.pytesseract==0.3.13
26
+ uvloop==0.21.0
27
+ watchfiles==1.0.3
28
+ wheel==0.44.0