Spaces:

mbudisic
/

PsTuts-RAG

Sleeping

mbudisic commited on May 31

Commit

94a6b26

1 Parent(s): f672221

feat: Add multi-provider API support with configurable model selectors

- Introduce ModelAPI enum for OpenAI, HuggingFace, and Ollama providers
- Add ChatAPISelector and EmbeddingsAPISelector for dynamic model instantiation
- Update related modules to support configurable AI model providers
- Enable runtime selection of chat and embedding models based on configuration

Files changed (11) hide show

.vscode/settings.json +2 -0
app.py +21 -95
notebooks/transcript_rag.ipynb +165 -170
pstuts_rag/pstuts_rag/configuration.py +30 -1
pstuts_rag/pstuts_rag/datastore.py +99 -85
pstuts_rag/pstuts_rag/graph.py +128 -5
pstuts_rag/pstuts_rag/rag.py +6 -2
pstuts_rag/pstuts_rag/rag_for_transcripts.py +150 -0
pstuts_rag/pstuts_rag/utils.py +92 -0
pyproject.toml +3 -0
uv.lock +97 -0

.vscode/settings.json CHANGED Viewed

@@ -2,6 +2,8 @@
     "python.pythonPath": "/home/mbudisic/Documents/PsTuts-RAG/.venv/bin/python",
     "cSpell.words": [
         "chainlit",
         "pstuts",
         "qdrant"
     ],

     "python.pythonPath": "/home/mbudisic/Documents/PsTuts-RAG/.venv/bin/python",
     "cSpell.words": [
         "chainlit",
+        "huggingface",
+        "ollama",
         "pstuts",
         "qdrant"
     ],

app.py CHANGED Viewed

@@ -1,11 +1,13 @@
 from pstuts_rag.configuration import Configuration
 from pstuts_rag.state import PsTutsTeamState
 import requests
 import asyncio
 import json
 import os
 import getpass
-from typing import Any, Dict, List, Tuple
 import re
 import chainlit as cl
@@ -15,27 +17,20 @@ from langchain_core.language_models import BaseChatModel
 from langchain_core.runnables import Runnable
 from langchain_openai import ChatOpenAI
 from langchain_core.embeddings import Embeddings
-from langchain_qdrant import QdrantVectorStore
-from qdrant_client import QdrantClient
 from langchain_core.messages import HumanMessage, BaseMessage
 import langgraph.graph
-from pstuts_rag.graph import create_team_supervisor
-from pstuts_rag.graph import create_tavily_node
 import pstuts_rag.datastore
 import pstuts_rag.rag
-from pstuts_rag.graph import create_rag_node
-from pstuts_rag.datastore import load_json_files
-from pstuts_rag.prompts import SUPERVISOR_SYSTEM
 import nest_asyncio
 from uuid import uuid4
-from sentence_transformers import SentenceTransformer
 import logging
 logging.getLogger("httpx").setLevel(logging.WARNING)
@@ -70,7 +65,7 @@ class ApplicationState:
     Maintains the state of the application and its components.
     Attributes:
-        embeddings: OpenAI embeddings model for vector operations
         docs: List of loaded documents
         qdrant_client: Client for Qdrant vector database
         vector_store: Vector store for document retrieval
@@ -87,15 +82,15 @@ class ApplicationState:
     embeddings: Embeddings = None
     docs: List[Document] = []
-    qdrant_client: QdrantClient = None
-    vector_store: QdrantVectorStore = None
-    datastore_manager: pstuts_rag.datastore.DatastoreManager
-    rag: pstuts_rag.rag.RAGChainInstance
-    llm: BaseChatModel
-    rag_chain: Runnable
-    ai_graph: Runnable
-    ai_graph_sketch: langgraph.graph.StateGraph
     tasks: List[asyncio.Task] = []
@@ -126,83 +121,12 @@ ai_state = PsTutsTeamState(
 )
-async def fill_the_db(
-    state: ApplicationState,
-):
-    """
-    Populates the vector database with document data if it's empty.
-    Args:
-        state: Application state containing the datastore manager
-    Returns:
-        0 if database already has documents, otherwise None
-    """
-    data: List[Dict[str, Any]] = await load_json_files(params.filename)
-    _ = await state.rag.build_chain(data)
-    await cl.Message(
-        content=f"✅ The database has been loaded with {state.rag.pointsLoaded} elements!"
-    ).send()
-async def build_the_graph(current_state: ApplicationState):
-    """
-    Builds the agent graph for routing user queries.
-    Creates the necessary nodes (Adobe help, RAG search, supervisor), defines their
-    connections, and compiles the graph into a runnable chain.
-    Args:
-        current_state: Current application state with required components
-    """
-    adobe_help_node, _, _ = create_tavily_node(
-        llm=app_state.llm, name=ADOBEHELP
-    )
-    rag_node, _ = create_rag_node(
-        rag_chain=current_state.rag.rag_chain,
-        name=VIDEOARCHIVE,
-    )
-    supervisor_agent = create_team_supervisor(
-        current_state.llm,
-        SUPERVISOR_SYSTEM,
-        [VIDEOARCHIVE, ADOBEHELP],
-    )
-    ai_graph = langgraph.graph.StateGraph(PsTutsTeamState)
-    ai_graph.add_node(VIDEOARCHIVE, rag_node)
-    ai_graph.add_node(ADOBEHELP, adobe_help_node)
-    ai_graph.add_node("supervisor", supervisor_agent)
-    edges = [
-        [VIDEOARCHIVE, "supervisor"],
-        [ADOBEHELP, "supervisor"],
-    ]
-    [ai_graph.add_edge(*p) for p in edges]
-    ai_graph.add_conditional_edges(
-        "supervisor",
-        lambda x: x["next"],
-        {
-            VIDEOARCHIVE: VIDEOARCHIVE,
-            ADOBEHELP: ADOBEHELP,
-            "FINISH": langgraph.graph.END,
-        },
-    )
-    ai_graph.set_entry_point("supervisor")
-    app_state.ai_graph_sketch = ai_graph
-    app_state.ai_graph = enter_chain | ai_graph.compile()
 async def initialize():
     await fill_the_db(app_state)
-    await build_the_graph(app_state)
 def enter_chain(message: str):
@@ -233,8 +157,10 @@ async def on_chat_start():
     for database population and graph building.
     """
     app_state.llm = ChatOpenAI(model=params.tool_calling_model, temperature=0)
-    app_state.qdrant_client = QdrantClient(":memory:")
-    app_state.embeddings = SentenceTransformer(params.embedding_model)
     app_state.rag = pstuts_rag.rag.RAGChainInstance(
         name="deployed",

 from pstuts_rag.configuration import Configuration
+from pstuts_rag.datastore import fill_the_db
+from pstuts_rag.graph import build_the_graph
 from pstuts_rag.state import PsTutsTeamState
 import requests
 import asyncio
 import json
 import os
 import getpass
+from typing import List, Tuple
 import re
 import chainlit as cl
 from langchain_core.runnables import Runnable
 from langchain_openai import ChatOpenAI
 from langchain_core.embeddings import Embeddings
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_core.messages import HumanMessage, BaseMessage
 import langgraph.graph
 import pstuts_rag.datastore
 import pstuts_rag.rag
 import nest_asyncio
 from uuid import uuid4
 import logging
 logging.getLogger("httpx").setLevel(logging.WARNING)
     Maintains the state of the application and its components.
     Attributes:
+        embeddings: Embeddings model for vector operations
         docs: List of loaded documents
         qdrant_client: Client for Qdrant vector database
         vector_store: Vector store for document retrieval
     embeddings: Embeddings = None
     docs: List[Document] = []
+    qdrant_client = None
+    vector_store = None
+    datastore_manager = None
+    rag = None
+    llm: BaseChatModel = None
+    rag_chain: Runnable = None
+    ai_graph: Runnable = None
+    ai_graph_sketch = None
     tasks: List[asyncio.Task] = []
 )
 async def initialize():
     await fill_the_db(app_state)
+    app_state.ai_graph, app_state.ai_graph_sketch = await build_the_graph(
+        app_state
+    )
 def enter_chain(message: str):
     for database population and graph building.
     """
     app_state.llm = ChatOpenAI(model=params.tool_calling_model, temperature=0)
+    # Use LangChain's built-in HuggingFaceEmbeddings wrapper
+    app_state.embeddings = HuggingFaceEmbeddings(
+        model_name=params.embedding_model
+    )
     app_state.rag = pstuts_rag.rag.RAGChainInstance(
         name="deployed",

notebooks/transcript_rag.ipynb CHANGED Viewed

@@ -2,267 +2,231 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from getpass import getpass\n",
-    "from dotenv import load_dotenv\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pstuts_rag"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
      ]
     }
    ],
    "source": [
-    "%load_ext autoreload\n",
-    "%autoreload 2\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from dataclasses import dataclass\n",
-    "@dataclass\n",
-    "class ApplicationParameters:\n",
-    "    filename = \"data/test.json\"\n",
-    "    embedding_model = \"text-embedding-3-small\"\n",
-    "    n_context_docs = 2\n",
-    "\n",
-    "params = ApplicationParameters()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
-    "\n",
-    "load_dotenv()\n",
-    "\n",
-    "def set_api_key_if_not_present(key_name, prompt_message=\"\"):\n",
-    "    if len(prompt_message) == 0:\n",
-    "        prompt_message=key_name\n",
-    "    if key_name not in os.environ or not os.environ[key_name]:\n",
-    "        os.environ[key_name] = getpass.getpass(prompt_message)\n",
-    "\n",
-    "set_api_key_if_not_present(\"OPENAI_API_KEY\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Data Preparation\n",
-    "\n",
-    "First, we will read in the transcripts of the videos and convert them to Documents\n",
-    "with appropriate metadata."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from ast import Dict\n",
-    "import json\n",
-    "\n",
-    "from pstuts_rag.loader import load_json_files\n",
-    "filename = [\"../data/test.json\",\"../data/dev.json\"]\n",
-    "from typing import List, Dict, Any\n",
-    "data:List[Dict[str,Any]] = await load_json_files(filename)\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['Get organized with layer groups',\n",
-       " 'Remove unwanted objects from photos',\n",
-       " 'Include vector graphics',\n",
-       " 'Remove unwanted content',\n",
-       " 'Add a central element',\n",
-       " 'Set the resolution',\n",
-       " 'Understand layers',\n",
-       " 'Adjust brightness and contrast',\n",
-       " 'Remove a large object',\n",
-       " 'Add text',\n",
-       " 'Replace a background using a layer mask',\n",
-       " 'Use layers for ultimate flexibility and control',\n",
-       " 'Select part of an image',\n",
-       " 'Get to know layers',\n",
-       " 'Improve lighting and color',\n",
-       " 'Add dreamlike elements to the composite',\n",
-       " 'Expand the canvas',\n",
-       " 'Resize a layer',\n",
-       " 'Adjust hue and saturation',\n",
-       " 'Learn selection basics',\n",
-       " 'Choose a color',\n",
-       " 'Add texture to an image']"
       ]
      },
-     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "[ d[\"title\"] for d in data ]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## R - retrieval"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Let's hit it with a semantic chunker."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pstuts_rag.datastore import DatastoreManager\n",
-    "from qdrant_client import QdrantClient\n",
-    "\n",
-    "client = QdrantClient(\":memory:\")\n",
-    "\n",
-    "retriever_factory = DatastoreManager(qdrant_client=client,name=\"local_test\")\n",
-    "if retriever_factory.count_docs() == 0:\n",
-    "    await retriever_factory.populate_database(raw_docs=data)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## A - Augmentation\n",
-    "\n",
-    "We need to populate a prompt for LLM.\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Generation\n",
-    "\n",
-    "We will use a 4.1-nano to generate answers."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
      "output_type": "stream",
      "text": [
-      "<built-in function repr>\n"
      ]
     }
    ],
    "source": [
-    "from pstuts_rag.rag import RAGChainFactory\n",
-    "\n",
-    "rag_factory = RAGChainFactory(retriever=retriever_factory.get_retriever())"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
    "metadata": {},
-   "outputs": [],
    "source": [
-    "from langchain_openai import ChatOpenAI\n",
-    "\n",
-    "llm = ChatOpenAI(model=\"gpt-4.1-mini\",temperature=0)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
    "metadata": {},
-   "outputs": [],
    "source": [
-    "get_videos = rag_factory.get_rag_chain(llm)\n",
-    "   \n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
-    "val = await get_videos.ainvoke({\"question\":\"What are layers\"})"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "{'refusal': None,\n",
-       " 'context': [Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[0.47, 3.41], [3.81, 9.13], [9.309999, 15.01], [15.299999, 20.57], [20.88, 23.3], [23.83, 27.93], [29.38, 32.79], [32.96, 33.92], [34.43, 40.21], [41.91, 45.37], [45.88, 49.01], [49.54, 55.130001], [55.72, 58.49], [58.72, 62.14]], 'start': 0.47, 'stop': 62.14, '_id': 21, '_collection_name': 'local_test'}, page_content=\"Layers are the building blocks of any image in Photoshop CC. So, it's important to understand, what layers are and why to use them - which we'll cover in this video. If you're following along, open this layered image from the downloadable practice files for this tutorial. You might think of layers like separate flat pints of glass, stacked one on top of the other. Each layer contains separate pieces of content. To get a sense of how layers are constructed, let's take a look at this Layers panel. I've closed my other panels, so that we can focus on the Layers panel. But you can skip that. By the way: If your Layers panel isn't showing, go up to the Window menu and choose Layers from there. The Layers panel is where you go to select and work with layers. In this image there are 4 layers, each with separate content. If you click the Eye icon to the left of a layer, you can toggle the visibility of that layer off and on. So, I'm going to turn off the visibility of the tailor layer. And keep your eye on the image, so you can see what's on that layer.\"),\n",
-       "  Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[85.75, 88.659999], [89.42, 100.11], [101.469999, 108.64], [109.09, 117.459999], [117.75, 129.45], [129.97, 133.37], [133.73, 143.98], [144.76, 152.97]], 'start': 85.75, 'stop': 152.97, '_id': 23, '_collection_name': 'local_test'}, page_content=\"Now let's take a look at just one layer, the tailor layer. A quick way to turn off all the layers except the tailor layer, is to hold down the Option key on the Mac, or the ALT key on the PC, and click on the Eye icon to the left of the tailor layer. In the Document window, you can see that this layer contains just the one small photo surrounded by a gray and white checkerboard pattern. That pattern represents transparent pixels, which allow us to see down through the corresponding part of this layer to the content of the layers below. So, let's turn that content back on by going back to the Layers panel, again holding the Option key on the Mac or the ALT key on the PC and clicking on the Eye icon to the left of the tailor layer. And all the other layers and their Eye icons come back into view. So again: You might think of layers like a stack of pints of glass, each with its own artwork and in some cases transparent areas that let you see down through to the layers below. The biggest benefit of having items on separate layers like this, is that you'll be able to edit pieces of an image independently without affecting the rest of the image.\")],\n",
-       " 'question': 'What are layers'}"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
-    "val.additional_kwargs"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -271,7 +235,30 @@
      "text": [
       "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
       "\n",
-      "Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other. Each layer contains separate pieces of content. Some parts of a layer can be transparent, allowing you to see through to the layers below. This setup lets you edit parts of an image independently without affecting the rest of the image. You manage and view layers in the Layers panel, where you can toggle their visibility on and off using the Eye icon. (See explanation around 0:28 to 1:00 and 1:25 to 2:32) 🎨🖼️\n",
       "**REFERENCES**\n",
       "[\n",
       "  {\n",
@@ -281,37 +268,45 @@
       "    \"stop\": 62.14\n",
       "  },\n",
       "  {\n",
       "    \"title\": \"Understand layers\",\n",
       "    \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\n",
-      "    \"start\": 85.75,\n",
-      "    \"stop\": 152.97\n",
       "  }\n",
       "]\n"
      ]
     }
    ],
    "source": [
-    "val.pretty_print()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other. Each layer contains separate pieces of content. Some parts of a layer can be transparent, allowing you to see through to the layers below. This setup lets you edit parts of an image independently without affecting the rest of the image. You manage and view layers in the Layers panel, where you can toggle their visibility on and off using the Eye icon. (See explanation around 0:28 to 1:00 and 1:25 to 2:32) 🎨🖼️\\n**REFERENCES**\\n[\\n  {\\n    \"title\": \"Understand layers\",\\n    \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\\n    \"start\": 0.47,\\n    \"stop\": 62.14\\n  },\\n  {\\n    \"title\": \"Understand layers\",\\n    \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\\n    \"start\": 85.75,\\n    \"stop\": 152.97\\n  }\\n]'"
       ]
      },
-     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "val.content"
    ]
   },
   {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "/home/mbudisic/Documents/PsTuts-RAG\n"
      ]
     }
    ],
    "source": [
+    "%cd /home/mbudisic/Documents/PsTuts-RAG\n",
+    "import os\n",
+    "from getpass import getpass\n",
+    "from dotenv import load_dotenv\n",
+    "from pstuts_rag.configuration import Configuration\n",
+    "import asyncio\n",
+    "\n",
+    "import nest_asyncio\n",
+    "nest_asyncio.apply()\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
+    "import pstuts_rag"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
+    "import logging\n",
+    "logging.basicConfig(\n",
+    "    level=Configuration().eva_log_level,\n",
+    "    format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s'\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "True"
       ]
      },
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "load_dotenv()\n"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-05-30 20:08:35,183 - INFO - <module> - Loaded .env file\n"
+     ]
+    }
+   ],
    "source": [
+    "from pstuts_rag.rag_for_transcripts import *"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
+     "name": "stderr",
      "output_type": "stream",
      "text": [
+      "2025-05-30 20:08:36,978 - INFO - print - Configuration parameters:\n",
+      "2025-05-30 20:08:36,980 - INFO - print -   eva_workflow_name: EVA_workflow\n",
+      "2025-05-30 20:08:36,980 - INFO - print -   eva_log_level: INFO\n",
+      "2025-05-30 20:08:36,981 - INFO - print -   transcript_glob: ./data/dev.json:./data/test.json\n",
+      "2025-05-30 20:08:36,982 - INFO - print -   embedding_model: mxbai-embed-large\n",
+      "2025-05-30 20:08:36,983 - INFO - print -   embedding_api: ModelAPI.OLLAMA\n",
+      "2025-05-30 20:08:36,984 - INFO - print -   llm_api: ModelAPI.OLLAMA\n",
+      "2025-05-30 20:08:36,985 - INFO - print -   max_research_loops: 2\n",
+      "2025-05-30 20:08:36,986 - INFO - print -   llm_tool_model: deepseek-r1:8b\n",
+      "2025-05-30 20:08:36,987 - INFO - print -   n_context_docs: 3\n"
      ]
     }
    ],
    "source": [
+    "Configuration().print(logging.info)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-05-30 20:08:37,093 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:37,118 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n"
+     ]
+    }
+   ],
    "source": [
+    "datastore:DatastoreManager = startup(callback_on_loading_complete=lambda _: logging.warning(\"Loading complete.\")) "
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-05-30 20:08:38,120 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:39,173 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:39,862 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:40,765 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:41,275 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:41,539 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:42,447 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:43,415 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:44,236 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:45,746 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:45,770 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:46,832 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:47,754 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:48,859 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:49,732 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:50,740 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:51,604 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:52,113 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:53,060 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:53,895 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:54,734 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:55,707 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:56,114 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:56,447 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:56,765 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:56,878 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:57,200 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:57,438 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:57,750 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:58,116 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:58,713 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:59,059 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:08:59,110 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
+    "await datastore.wait_for_loading()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
+    "chain = retrieve_videos(datastore)"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2025-05-30 20:08:59,268 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
+      "2025-05-30 20:09:11,924 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/chat \"HTTP/1.1 200 OK\"\n"
+     ]
     }
    ],
    "source": [
+    "response = chain.invoke({\"question\":\"What is a layer?\"})"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "text": [
       "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
       "\n",
+      "<think>\n",
+      "Okay, I need to figure out what a layer is based on the provided transcript. Let me go through the context step by step.\n",
+      "\n",
+      "First, looking at the first document with video ID 19172. The title says \"Understand layers\" and the description mentions that layers are the building blocks of any image in Photoshop CC. So, layers must be fundamental components.\n",
+      "\n",
+      "In the page content, it compares layers to separate flat prints of glass stacked on top of each other. Each layer has different content. That makes me think of layers as separate elements or parts of an image that can be edited individually.\n",
+      "\n",
+      "There's also a mention of the Layers panel where you select and work with layers. It shows 4 layers, each with distinct content. The Eye icon can toggle visibility, so layers can be shown or hidden. This suggests that layers are like different layers of content that can be managed separately.\n",
+      "\n",
+      "Looking at another document with video ID 4103, it says layers allow isolation of individual pieces of content within a composition. For example, text or brush strokes can be separated into their own layers for detailed editing without affecting other parts.\n",
+      "\n",
+      "Another mention from the same video talks about using layers for adding highlights or shadows by isolating small elements, which means each part can be worked on independently.\n",
+      "\n",
+      "Putting this together, a layer is like a separate sheet in an image that holds different elements. Each layer allows you to edit, move, or manipulate specific parts without affecting others. The Layers panel helps manage and control these layers for better organization and editing flexibility.\n",
+      "</think>\n",
+      "\n",
+      "🎨 **What is a Layer?**  \n",
+      "Layers are like separate sheets in an image, each holding distinct content. Think of them as individual elements stacked on top of each other, allowing you to edit or manipulate specific parts without affecting others. \n",
+      "\n",
+      "For example:  \n",
+      "- Each layer can contain text, images, or design elements.  \n",
+      "- You can toggle their visibility using the Eye icon.  \n",
+      "\n",
+      "📌 **Timestamp**: 0.47 - 3.41 minutes (video ID 19172)\n",
       "**REFERENCES**\n",
       "[\n",
       "  {\n",
       "    \"stop\": 62.14\n",
       "  },\n",
       "  {\n",
+      "    \"title\": \"Use layers for ultimate flexibility and control\",\n",
+      "    \"source\": \"https://videos-tv.adobe.com/2014-09-04/96f51d8958ae31b37cb5a15cbdc21744.mp4\",\n",
+      "    \"start\": 0.82,\n",
+      "    \"stop\": 30.13\n",
+      "  },\n",
+      "  {\n",
       "    \"title\": \"Understand layers\",\n",
       "    \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\n",
+      "    \"start\": 280.4,\n",
+      "    \"stop\": 284.58\n",
       "  }\n",
       "]\n"
      ]
     }
    ],
    "source": [
+    "response.pretty_print()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "[Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'data/test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[0.47, 3.41], [3.81, 9.13], [9.309999, 15.01], [15.299999, 20.57], [20.88, 23.3], [23.83, 27.93], [29.38, 32.79], [32.96, 33.92], [34.43, 40.21], [41.91, 45.37], [45.88, 49.01], [49.54, 55.130001], [55.72, 58.49], [58.72, 62.14]], 'start': 0.47, 'stop': 62.14, '_id': 63, '_collection_name': 'dc0cf104-0069-4983-8a12-8d3de4132745'}, page_content=\"Layers are the building blocks of any image in Photoshop CC. So, it's important to understand, what layers are and why to use them - which we'll cover in this video. If you're following along, open this layered image from the downloadable practice files for this tutorial. You might think of layers like separate flat pints of glass, stacked one on top of the other. Each layer contains separate pieces of content. To get a sense of how layers are constructed, let's take a look at this Layers panel. I've closed my other panels, so that we can focus on the Layers panel. But you can skip that. By the way: If your Layers panel isn't showing, go up to the Window menu and choose Layers from there. The Layers panel is where you go to select and work with layers. In this image there are 4 layers, each with separate content. If you click the Eye icon to the left of a layer, you can toggle the visibility of that layer off and on. So, I'm going to turn off the visibility of the tailor layer. And keep your eye on the image, so you can see what's on that layer.\"),\n",
+       " Document(metadata={'video_id': 4103, 'title': 'Use layers for ultimate flexibility and control', 'desc': 'Learn how to use layers to create designs, fix photos, or build collages.', 'length': '00:05:06.55', 'group': 'data/dev.json', 'source': 'https://videos-tv.adobe.com/2014-09-04/96f51d8958ae31b37cb5a15cbdc21744.mp4', 'speech_start_stop_times': [[0.82, 5.88], [6.51, 18.389999], [19.219999, 30.13]], 'start': 0.82, 'stop': 30.13, '_id': 0, '_collection_name': 'dc0cf104-0069-4983-8a12-8d3de4132745'}, page_content=\"As a new Photoshop user, you're going to find that the layers panel is an incredibly powerful tool. The layers panel gives you the ability to isolate individual pieces of content away from the rest of the composition giving you the ability to work on individual elements within the overall document. Now, this can be used for something as literal as some type in this case, or something as subtle as a small brush stroke to add a highlight or shadow to an image.\"),\n",
+       " Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'data/test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[280.4, 284.58]], 'start': 280.4, 'stop': 284.58, '_id': 66, '_collection_name': 'dc0cf104-0069-4983-8a12-8d3de4132745'}, page_content=\"There's lots more to learn about layers, so stay tuned for the rest of this tutorial.\")]"
       ]
      },
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "response.additional_kwargs[\"context\"]"
    ]
   },
   {

pstuts_rag/pstuts_rag/configuration.py CHANGED Viewed

@@ -1,10 +1,20 @@
 import os
 from dataclasses import dataclass, fields
 from typing import Any, Optional
 from langchain_core.runnables import RunnableConfig
 @dataclass(kw_only=True)
 class Configuration:
     """
@@ -13,6 +23,7 @@ class Configuration:
     Attributes:
         transcript_glob: Glob pattern for transcript JSON files (supports multiple files separated by ':')
         embedding_model: Name of the embedding model to use (default: custom fine-tuned snowflake model)
         max_research_loops: Maximum number of research loops to perform
         llm_tool_model: Name of the LLM model to use for tool calling
         n_context_docs: Number of context documents to retrieve for RAG
@@ -34,9 +45,19 @@ class Configuration:
         )
     )
     max_research_loops: int = int(os.environ.get("MAX_RESEARCH_LOOPS", "3"))
-    llm_tool_model: str = str(os.environ.get("LLM_TOOL_MODEL", "gpt-4.1-mini"))
     n_context_docs: int = int(os.environ.get("N_CONTEXT_DOCS", "2"))
     @classmethod
@@ -55,3 +76,11 @@ class Configuration:
             if f.init
         }
         return cls(**{k: v for k, v in values.items() if v})

 import os
+import logging
 from dataclasses import dataclass, fields
 from typing import Any, Optional
+from enum import Enum
 from langchain_core.runnables import RunnableConfig
+class ModelAPI(Enum):
+    """Enum for supported embedding API providers."""
+    OPENAI = "OPENAI"
+    HUGGINGFACE = "HUGGINGFACE"
+    OLLAMA = "OLLAMA"
 @dataclass(kw_only=True)
 class Configuration:
     """
     Attributes:
         transcript_glob: Glob pattern for transcript JSON files (supports multiple files separated by ':')
         embedding_model: Name of the embedding model to use (default: custom fine-tuned snowflake model)
+        embedding_api: API provider for embeddings (OPENAI or HUGGINGFACE)
         max_research_loops: Maximum number of research loops to perform
         llm_tool_model: Name of the LLM model to use for tool calling
         n_context_docs: Number of context documents to retrieve for RAG
         )
     )
+    embedding_api: ModelAPI = ModelAPI(
+        os.environ.get("EMBEDDING_API", ModelAPI.HUGGINGFACE.value)
+    )
+    llm_api: ModelAPI = ModelAPI(
+        os.environ.get("LLM_API", ModelAPI.OPENAI.value)
+    )
     max_research_loops: int = int(os.environ.get("MAX_RESEARCH_LOOPS", "3"))
+    llm_tool_model: str = str(
+        os.environ.get("LLM_TOOL_MODEL", "smollm2:1.7b-instruct-q2_K")
+    )
     n_context_docs: int = int(os.environ.get("N_CONTEXT_DOCS", "2"))
     @classmethod
             if f.init
         }
         return cls(**{k: v for k, v in values.items() if v})
+    def print(self, print_like_function=logging.info) -> None:
+        """Log all configuration parameters using logging.debug."""
+        print_like_function("Configuration parameters:")
+        for field in fields(self):
+            if field.init:
+                value = getattr(self, field.name)
+                print_like_function("  %s: %s", field.name, value)

pstuts_rag/pstuts_rag/datastore.py CHANGED Viewed

@@ -3,25 +3,28 @@ import json
 import glob
 import aiofiles
 from pathlib import Path
-from typing import List, Dict, Iterator, Any
 import uuid
 import chainlit as cl
 from langchain_core.document_loaders import BaseLoader
 from langchain_experimental.text_splitter import SemanticChunker
 from langchain_openai.embeddings import OpenAIEmbeddings
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.vectorstores import VectorStoreRetriever
 from langchain_qdrant import QdrantVectorStore
 from qdrant_client import QdrantClient
 from qdrant_client.http.models import Distance, VectorParams
 from qdrant_client.models import PointStruct
-from app import ApplicationState, params
 def batch(iterable: List[Any], size: int = 16) -> Iterator[List[Any]]:
@@ -201,13 +204,6 @@ async def chunk_transcripts(
     for group in docs_group:
         docs_chunks_semantic.extend(group)
-    # locate individual sections of the original transcript
-    # with the semantic chunks
-    def is_subchunk(a: Document, ofb: Document) -> bool:
-        return (a.metadata["video_id"] == ofb.metadata["video_id"]) and (
-            a.page_content in ofb.page_content
-        )
     # Create a lookup dictionary for faster access
     video_id_to_chunks: Dict[int, List[Document]] = {}
     for chunk in docs_chunks_verbatim:
@@ -256,6 +252,8 @@ class DatastoreManager:
         qdrant_client: Client for Qdrant vector database
         name: Unique identifier for this retriever instance
         vector_store: The Qdrant vector store instance
     """
     embeddings: Embeddings
@@ -264,14 +262,17 @@ class DatastoreManager:
     name: str
     vector_store: QdrantVectorStore
     dimensions: int
     def __init__(
         self,
-        embeddings: Embeddings = OpenAIEmbeddings(
-            model="text-embedding-3-small"
-        ),
         qdrant_client: QdrantClient = QdrantClient(location=":memory:"),
         name: str = str(object=uuid.uuid4()),
     ) -> None:
         """Initialize the RetrieverFactory.
@@ -280,12 +281,23 @@ class DatastoreManager:
             qdrant_client: Qdrant client for vector database operations
             name: Unique identifier for this retriever instance
         """
-        self.embeddings = embeddings
-        self.name = name
         self.qdrant_client = qdrant_client
         # determine embedding dimension
-        self.dimensions = len(embeddings.embed_query("test"))
         self.qdrant_client.recreate_collection(
             collection_name=self.name,
@@ -298,11 +310,21 @@ class DatastoreManager:
         self.vector_store = QdrantVectorStore(
             client=self.qdrant_client,
             collection_name=self.name,
-            embedding=embeddings,
         )
         self.docs = []
     async def populate_database(self, raw_docs: List[Dict[str, Any]]) -> int:
         """
         Populate the vector database with processed video transcript documents.
@@ -362,6 +384,14 @@ class DatastoreManager:
             points=points,
         )
         return len(points)
     def count_docs(self) -> int:
@@ -403,38 +433,53 @@ class DatastoreManager:
             search_kwargs={"k": n_context_docs}
         )
-def load_json_string(content: str, group: str):
-    """
-    Parse JSON string content and add group metadata to each video entry.
-    This utility function parses a JSON string containing video data and enhances
-    each video dictionary with a 'group' field for categorization purposes.
-    Args:
-        content (str): JSON string containing a list of video objects with
-                      transcript data and metadata
-        group (str): Group identifier to be added to each video entry,
-                    typically used for organizing videos by source or category
-    Returns:
-        List[Dict]: List of video dictionaries with added 'group' field
-    Raises:
-        json.JSONDecodeError: If content is not valid JSON format
-    Example:
-        >>> content = '[{"video_id": 1, "title": "Tutorial"}]'
-        >>> result = load_json_string(content, "python_tutorials")
-        >>> result[0]["group"]
-        'python_tutorials'
-    """
-    payload: List[Dict] = json.loads(content)
-    [video.update({"group": group}) for video in payload]
-    return payload
-async def load_single_json(filepath):
     """
     Asynchronously load and parse a single JSON file containing video data.
@@ -461,12 +506,13 @@ async def load_single_json(filepath):
     async with aiofiles.open(my_path, mode="r", encoding="utf-8") as f:
         content = await f.read()
-        payload = load_json_string(content, my_path.name)
     return payload
-async def load_json_files(path_pattern: List[str]):
     """
     Asynchronously load and parse multiple JSON files matching given patterns.
@@ -475,7 +521,7 @@ async def load_json_files(path_pattern: List[str]):
     is designed to handle large datasets efficiently by leveraging async I/O.
     Args:
-        path_pattern (List[str]): List of glob patterns to match JSON files.
                                  Supports standard glob syntax including recursive
                                  patterns with ** for subdirectory traversal.
@@ -493,48 +539,16 @@ async def load_json_files(path_pattern: List[str]):
         >>> videos = await load_json_files(patterns)
         >>> len(videos)  # Total videos from all matched files
     """
     files = []
-    for f in path_pattern:
-        (files.extend(glob.glob(f, recursive=True)))
     tasks = [load_single_json(f) for f in files]
     results = await asyncio.gather(*tasks)
     return [item for sublist in results for item in sublist]  # flatten
-async def fill_the_db(
-    state: ApplicationState,
-):
-    """
-    Initialize and populate the vector database with video transcript data.
-    This function serves as the main entry point for database initialization.
-    It loads video data from configured file patterns, processes them through
-    the RAG pipeline, and provides user feedback about the loading process.
-    The function is designed to be idempotent - it can be called multiple times
-    safely and will only populate the database if it's empty.
-    Args:
-        state (ApplicationState): Application state object containing the RAG
-                                system and datastore manager for database operations
-    Returns:
-        None: Function operates through side effects (database population and UI updates)
-    Side Effects:
-        - Populates the vector database with processed video transcripts
-        - Sends confirmation message to the user interface
-        - Updates the state.rag.pointsLoaded counter
-    Note:
-        Uses the params.filename configuration to determine which files to load.
-        Sends a Chainlit message to inform users of successful database loading.
-    """
-    data: List[Dict[str, Any]] = await load_json_files(params.filename)
-    _ = await state.rag.build_chain(data)
-    await cl.Message(
-        content=f"✅ The database has been loaded with "
-        f"{state.rag.pointsLoaded} elements!"
-    ).send()

 import glob
 import aiofiles
 from pathlib import Path
+from typing import List, Dict, Iterator, Any, Callable, Optional
 import uuid
+import logging
 import chainlit as cl
 from langchain_core.document_loaders import BaseLoader
 from langchain_experimental.text_splitter import SemanticChunker
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain_ollama.embeddings import OllamaEmbeddings
 from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.vectorstores import VectorStoreRetriever
 from langchain_qdrant import QdrantVectorStore
+from pstuts_rag.configuration import Configuration, ModelAPI
 from qdrant_client import QdrantClient
 from qdrant_client.http.models import Distance, VectorParams
 from qdrant_client.models import PointStruct
+from pstuts_rag.utils import EmbeddingsAPISelector
 def batch(iterable: List[Any], size: int = 16) -> Iterator[List[Any]]:
     for group in docs_group:
         docs_chunks_semantic.extend(group)
     # Create a lookup dictionary for faster access
     video_id_to_chunks: Dict[int, List[Document]] = {}
     for chunk in docs_chunks_verbatim:
         qdrant_client: Client for Qdrant vector database
         name: Unique identifier for this retriever instance
         vector_store: The Qdrant vector store instance
+        loading_complete: AsyncIO event that's set when data loading completes
+        _completion_callbacks: List of callbacks to call when loading completes
     """
     embeddings: Embeddings
     name: str
     vector_store: QdrantVectorStore
     dimensions: int
+    loading_complete: asyncio.Event
+    _completion_callbacks: List[Callable]
+    config: Optional[Configuration]
     def __init__(
         self,
+        embeddings: Optional[Embeddings] = None,
         qdrant_client: QdrantClient = QdrantClient(location=":memory:"),
         name: str = str(object=uuid.uuid4()),
+        config: Configuration = Configuration(),
     ) -> None:
         """Initialize the RetrieverFactory.
             qdrant_client: Qdrant client for vector database operations
             name: Unique identifier for this retriever instance
         """
+        if embeddings is None:
+            cls = EmbeddingsAPISelector.get(
+                config.embedding_api, HuggingFaceEmbeddings
+            )
+            self.embeddings = cls(model=config.embedding_model)
+        else:
+            self.embeddings = embeddings
+        self.name = name if name else config.eva_workflow_name
         self.qdrant_client = qdrant_client
+        self.loading_complete = asyncio.Event()
+        self._completion_callbacks = []
         # determine embedding dimension
+        self.dimensions = len(self.embeddings.embed_query("test"))
         self.qdrant_client.recreate_collection(
             collection_name=self.name,
         self.vector_store = QdrantVectorStore(
             client=self.qdrant_client,
             collection_name=self.name,
+            embedding=self.embeddings,
         )
         self.docs = []
+    async def from_json_globs(self, globs: List[str]) -> int:
+        logging.debug("Starting to load files.")
+        data = await load_json_files(globs)
+        logging.debug("Received %d JSON files.", len(data))
+        count = await self.populate_database(data)
+        logging.debug("Uploaded %d records.", count)
+        return count
     async def populate_database(self, raw_docs: List[Dict[str, Any]]) -> int:
         """
         Populate the vector database with processed video transcript documents.
             points=points,
         )
+        self.loading_complete.set()
+        # Execute callbacks (both sync and async)
+        for callback in self._completion_callbacks:
+            if asyncio.iscoroutinefunction(callback):
+                await callback()
+            else:
+                callback()
         return len(points)
     def count_docs(self) -> int:
             search_kwargs={"k": n_context_docs}
         )
+    def is_ready(self) -> bool:
+        """Check if the datastore has finished loading data.
+        Returns:
+            bool: True if data loading is complete, False otherwise
+        """
+        return self.loading_complete.is_set()
+    def add_completion_callback(self, callback: Callable):
+        """Add a callback to be called when data loading completes.
+        Args:
+            callback: Callable function to be called when data loading completes
+        Note:
+            If loading has already completed, the callback will be called immediately.
+        """
+        if self.loading_complete.is_set():
+            # Loading already completed, execute callback immediately
+            if asyncio.iscoroutinefunction(callback):
+                # Need to schedule async callback
+                asyncio.create_task(callback())
+            else:
+                callback()
+        else:
+            # Loading not complete, add to callbacks list
+            self._completion_callbacks.append(callback)
+    async def wait_for_loading(self, timeout: Optional[float] = None):
+        """Wait for data loading to complete.
+        Args:
+            timeout: Maximum time to wait in seconds (None for no timeout)
+        Returns:
+            bool: True if loading completed, False if timeout occurred
+        """
+        try:
+            await asyncio.wait_for(
+                self.loading_complete.wait(), timeout=timeout
+            )
+            return True
+        except asyncio.TimeoutError:
+            return False
+async def load_single_json(filepath: str):
     """
     Asynchronously load and parse a single JSON file containing video data.
     async with aiofiles.open(my_path, mode="r", encoding="utf-8") as f:
         content = await f.read()
+        payload = json.loads(content)
+        for entry in payload:
+            entry.update({"group": str(my_path)})
     return payload
+async def load_json_files(glob_list: List[str]):
     """
     Asynchronously load and parse multiple JSON files matching given patterns.
     is designed to handle large datasets efficiently by leveraging async I/O.
     Args:
+        glob_list (List[str]): List of glob patterns to match JSON files.
                                  Supports standard glob syntax including recursive
                                  patterns with ** for subdirectory traversal.
         >>> videos = await load_json_files(patterns)
         >>> len(videos)  # Total videos from all matched files
     """
+    logging.debug("Loading from %d globs:", len(glob_list))
     files = []
+    for globstring in glob_list:
+        logging.debug("Loading glob: %s", globstring)
+        new_files = glob.glob(globstring, recursive=True)
+        logging.debug("New files: %d", len(new_files))
+        files.extend(new_files)
+    logging.debug("Total files: %d", len(files))
     tasks = [load_single_json(f) for f in files]
     results = await asyncio.gather(*tasks)
     return [item for sublist in results for item in sublist]  # flatten

pstuts_rag/pstuts_rag/graph.py CHANGED Viewed

@@ -7,12 +7,30 @@ from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.messages import AIMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.runnables import Runnable, RunnableLambda
-from pstuts_rag.prompts import TAVILY_SYSTEM
-from pstuts_rag.state import PsTutsTeamState
 import logging
-from typing import Callable, Dict, Tuple
 def search_agent(state: PsTutsTeamState, chain: Runnable) -> Dict:
@@ -75,7 +93,7 @@ def create_rag_node(rag_chain: Runnable, name: str = "VideoSearch"):
         name=name,
     )
-    return rag_node, lambda q: {"result": rag_chain.invoke({"question": q})}
 def create_agent(
@@ -126,7 +144,7 @@ def create_tavily_node(
         agent_node, agent=adobe_help_agent, name=name
     )
-    return adobe_help_node, adobe_help_agent, adobe_help_search
 def create_team_supervisor(llm: BaseChatModel, system_prompt, members):
@@ -161,3 +179,108 @@ def create_team_supervisor(llm: BaseChatModel, system_prompt, members):
         | llm.bind_functions(functions=[function_def], function_call="route")
         | JsonOutputFunctionsParser()
     )

 from langchain_core.messages import AIMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.runnables import Runnable, RunnableLambda
+from langgraph.graph import StateGraph
+from pstuts_rag.prompts import SUPERVISOR_SYSTEM, TAVILY_SYSTEM
+from pstuts_rag.state import PsTutsTeamState
+from pstuts_rag.datastore import DatastoreManager
+from pstuts_rag.configuration import Configuration
+import asyncio
+import functools
 import logging
+from typing import Callable, Dict, Tuple, Optional, Union
+from langchain_huggingface import HuggingFaceEmbeddings
+from app import (
+    ADOBEHELP,
+    VIDEOARCHIVE,
+    ApplicationState,
+    app_state,
+    enter_chain,
+)
+from pstuts_rag.rag_for_transcripts import retrieve_videos
 def search_agent(state: PsTutsTeamState, chain: Runnable) -> Dict:
         name=name,
     )
+    return rag_node
 def create_agent(
         agent_node, agent=adobe_help_agent, name=name
     )
+    return adobe_help_node
 def create_team_supervisor(llm: BaseChatModel, system_prompt, members):
         | llm.bind_functions(functions=[function_def], function_call="route")
         | JsonOutputFunctionsParser()
     )
+async def startup(
+    config=Configuration(), on_loading_complete: Optional[Callable] = None
+):
+    """
+    Initialize the application with optional loading completion callback.
+    Args:
+        config: Configuration object with application settings
+        on_loading_complete: Optional callback (sync or async) to call when
+                           datastore loading completes
+    Returns:
+        DatastoreManager: The initialized datastore manager
+    """
+    ### PROCESS THE CONFIGURATION
+    log_level = getattr(logging, config.eva_log_level, logging.INFO)
+    logging.basicConfig(level=log_level, format="%(levelname)s: %(message)s")
+    ### CREATE THE DATABASE
+    datastore = DatastoreManager(
+        name=config.eva_workflow_name,
+        embeddings=HuggingFaceEmbeddings(model_name=config.embedding_model),
+    )
+    ### START DATABASE POPULATION
+    globs = [str(g) for g in config.transcript_glob.split(":")]
+    # Add custom callback if provided, otherwise use default logging
+    if on_loading_complete:
+        datastore.add_completion_callback(on_loading_complete)
+    else:
+        # Default callback for logging
+        def default_logging_callback():
+            logging.info("🎉 Datastore loading completed!")
+        datastore.add_completion_callback(default_logging_callback)
+    asyncio.create_task(datastore.from_json_globs(globs))
+    ### CREATE THE RAG CHAIN
+    ai_graph = StateGraph(PsTutsTeamState, config_schema=Configuration)
+    return datastore
+async def build_the_graph(current_state: ApplicationState):
+    """
+    Builds the agent graph for routing user queries.
+    Creates the necessary nodes (Adobe help, RAG search, supervisor), defines their
+    connections, and compiles the graph into a runnable chain.
+    Args:
+        current_state: Current application state with required components
+    """
+    adobe_help_node, _, _ = create_tavily_node(
+        llm=app_state.llm, name=ADOBEHELP
+    )
+    rag_node, _ = create_rag_node(
+        rag_chain=retrieve_videos(),
+        name=VIDEOARCHIVE,
+    )
+    supervisor_agent = create_team_supervisor(
+        current_state.llm,
+        SUPERVISOR_SYSTEM,
+        [VIDEOARCHIVE, ADOBEHELP],
+    )
+    ai_graph = langgraph.graph.StateGraph(PsTutsTeamState)
+    ai_graph.add_node(VIDEOARCHIVE, rag_node)
+    ai_graph.add_node(ADOBEHELP, adobe_help_node)
+    ai_graph.add_node("supervisor", supervisor_agent)
+    edges = [
+        [VIDEOARCHIVE, "supervisor"],
+        [ADOBEHELP, "supervisor"],
+    ]
+    [ai_graph.add_edge(*p) for p in edges]
+    ai_graph.add_conditional_edges(
+        "supervisor",
+        lambda x: x["next"],
+        {
+            VIDEOARCHIVE: VIDEOARCHIVE,
+            ADOBEHELP: ADOBEHELP,
+            "FINISH": langgraph.graph.END,
+        },
+    )
+    ai_graph.set_entry_point("supervisor")
+    return enter_chain | ai_graph.compile(), ai_graph
+# Note: Cannot run build_the_graph() here as it requires current_state parameter
+# graph, _ = asyncio.run(build_the_graph())

pstuts_rag/pstuts_rag/rag.py CHANGED Viewed

@@ -32,6 +32,7 @@ from .prompts import RAG_PROMPT_TEMPLATES
 from .datastore import DatastoreManager
 class RAGChainFactory:
     """Factory class for creating RAG (Retrieval Augmented Generation) chains.
@@ -164,10 +165,11 @@ class RAGChainFactory:
         Returns:
             Runnable: The complete RAG chain
         """
         self.answer_chain = self.prompt_template | llm
         self.rag_chain = (
-            self.format_query
             | self.prepare_query
             | {"input": RunnablePassthrough(), "answer": self.answer_chain}
             | self.pack_references
@@ -226,7 +228,7 @@ class RAGChainInstance:
         self.llm = llm
         self.embeddings = embeddings
-    async def build_chain(
         self, json_payload: List[Dict[str, Any]]
     ) -> Runnable:
         """
@@ -259,3 +261,5 @@ class RAGChainInstance:
         )
         self.rag_chain = self.rag_factory.get_rag_chain(self.llm)
         return self.rag_chain

 from .datastore import DatastoreManager
+from pstuts_rag.configuration import Configuration
 class RAGChainFactory:
     """Factory class for creating RAG (Retrieval Augmented Generation) chains.
         Returns:
             Runnable: The complete RAG chain
         """
         self.answer_chain = self.prompt_template | llm
         self.rag_chain = (
+            itemgetter("question")
             | self.prepare_query
             | {"input": RunnablePassthrough(), "answer": self.answer_chain}
             | self.pack_references
         self.llm = llm
         self.embeddings = embeddings
+    async def build_chain()
         self, json_payload: List[Dict[str, Any]]
     ) -> Runnable:
         """
         )
         self.rag_chain = self.rag_factory.get_rag_chain(self.llm)
         return self.rag_chain

pstuts_rag/pstuts_rag/rag_for_transcripts.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import json
+import asyncio
+from operator import itemgetter
+from typing import Any, Dict, Union, Optional, Callable
+import logging
+from langchain.prompts import ChatPromptTemplate
+from langchain_core.messages import AIMessage
+from langchain_core.runnables import (
+    Runnable,
+    RunnableParallel,
+    RunnablePassthrough,
+    RunnableConfig,
+)
+from langchain_openai import ChatOpenAI
+from langchain_huggingface import ChatHuggingFace
+from langchain_ollama import ChatOllama
+from .datastore import DatastoreManager
+from .prompts import RAG_PROMPT_TEMPLATES
+from pstuts_rag.configuration import Configuration, ModelAPI
+def pack_references(msg_dict: Dict[str, Any]) -> AIMessage:
+    """Pack reference information into the AI message.
+    Takes the generated answer and input context, formats references,
+    and appends them to the message content.
+    Args:
+        msg_dict: Dictionary containing the answer and input
+    Returns:
+        AIMessage: Message with references appended
+    """
+    answer: AIMessage = msg_dict["answer"]
+    input = msg_dict["input"]
+    reference_dicts = [
+        {k: doc.metadata[k] for k in ("title", "source", "start", "stop")}
+        for doc in input["context"]
+    ]
+    references = str(json.dumps(reference_dicts, indent=2))
+    text_w_references = answer.content
+    if "I don't know" not in answer.content:
+        text_w_references = "\n".join(
+            [str(text_w_references), "**REFERENCES**", references]
+        )
+    output: AIMessage = answer.model_copy(
+        update={
+            "content": text_w_references,
+            "additional_kwargs": {
+                **answer.additional_kwargs,
+                "context": input["context"],
+                "question": input["question"],
+            },
+        }
+    )
+    return output
+def retrieve_videos(
+    datastore: DatastoreManager,
+    config: Union[RunnableConfig, Configuration] = Configuration(),
+) -> Runnable:
+    configurable = (
+        config
+        if isinstance(config, Configuration)
+        else Configuration.from_runnable_config(config)
+    )
+    cls = {
+        ModelAPI.HUGGINGFACE: ChatHuggingFace,
+        ModelAPI.OPENAI: ChatOpenAI,
+        ModelAPI.OLLAMA: ChatOllama,
+    }.get(configurable.llm_api, ChatOpenAI)
+    llm = cls(model=configurable.llm_tool_model)
+    answer_chain = (
+        ChatPromptTemplate.from_messages(list(RAG_PROMPT_TEMPLATES.items()))
+        | llm
+    )
+    rag_chain = (
+        itemgetter("question")
+        | RunnableParallel(
+            context=datastore.get_retriever(
+                n_context_docs=configurable.n_context_docs
+            ),
+            question=RunnablePassthrough(),
+        )
+        | {
+            "input": RunnablePassthrough(),
+            "answer": answer_chain,
+        }
+        | pack_references
+    )
+    return rag_chain
+def startup(
+    config=Configuration(),
+    callback_on_loading_complete: Optional[Callable] = None,
+):
+    """
+    Initialize the application with optional loading completion callback.
+    Args:
+        config: Configuration object with application settings
+        on_loading_complete: Optional callback (sync or async) to call when
+                           datastore loading completes
+    Returns:
+        DatastoreManager: The initialized datastore manager
+    """
+    ### PROCESS THE CONFIGURATION
+    log_level = getattr(logging, config.eva_log_level, logging.INFO)
+    logging.basicConfig(level=log_level, format="%(levelname)s: %(message)s")
+    ### CREATE THE DATABASE
+    datastore = DatastoreManager()
+    if callback_on_loading_complete:
+        datastore.add_completion_callback(callback_on_loading_complete)
+    ### START DATABASE POPULATION
+    globs = [str(g) for g in config.transcript_glob.split(":")]
+    # # Add custom callback if provided, otherwise use default logging
+    # if on_loading_complete:
+    #     datastore.add_completion_callback(on_loading_complete)
+    # else:
+    #     # Default callback for logging
+    #     def default_logging_callback():
+    #         logging.info("🎉 Datastore loading completed!")
+    #     datastore.add_completion_callback(default_logging_callback)
+    asyncio.create_task(datastore.from_json_globs(globs))
+    return datastore

pstuts_rag/pstuts_rag/utils.py CHANGED Viewed

	@@ -0,0 +1,92 @@

+from typing import Dict, Type
+from langchain_openai import ChatOpenAI
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain_huggingface import ChatHuggingFace
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_ollama import ChatOllama
+from langchain_ollama.embeddings import OllamaEmbeddings
+from pstuts_rag.configuration import ModelAPI
+# Chat model selector dictionary
+"""
+ChatAPISelector: Dictionary mapping ModelAPI enum values to their corresponding chat model classes.
+This selector enables dynamic instantiation of chat models based on the configured
+API provider. Supports OpenAI, HuggingFace, and Ollama chat model implementations.
+Type:
+    Dict[ModelAPI, Type[ChatHuggingFace | ChatOpenAI | ChatOllama]]
+Keys:
+    ModelAPI.HUGGINGFACE: Maps to ChatHuggingFace class
+    ModelAPI.OPENAI: Maps to ChatOpenAI class
+    ModelAPI.OLLAMA: Maps to ChatOllama class
+Example:
+    >>> from pstuts_rag.configuration import ModelAPI
+    >>> from pstuts_rag.utils import ChatAPISelector
+    >>>
+    >>> # Get the appropriate chat model class
+    >>> api_provider = ModelAPI.OPENAI
+    >>> chat_class = ChatAPISelector[api_provider]
+    >>>
+    >>> # Instantiate the chat model
+    >>> chat_model = chat_class(model="gpt-3.5-turbo", temperature=0.7)
+    >>>
+    >>> # Alternative usage with configuration
+    >>> config = Configuration(llm_api=ModelAPI.OLLAMA)
+    >>> chat_class = ChatAPISelector[config.llm_api]
+    >>> chat_model = chat_class(model="llama2:7b")
+"""
+ChatAPISelector: Dict[
+    ModelAPI, Type[ChatHuggingFace | ChatOpenAI | ChatOllama]
+] = {
+    ModelAPI.HUGGINGFACE: ChatHuggingFace,
+    ModelAPI.OPENAI: ChatOpenAI,
+    ModelAPI.OLLAMA: ChatOllama,
+}
+# Embeddings model selector dictionary
+"""
+EmbeddingsAPISelector: Dictionary mapping ModelAPI enum values to their corresponding embedding model classes.
+This selector enables dynamic instantiation of embedding models based on the configured
+API provider. Supports OpenAI, HuggingFace, and Ollama embedding implementations.
+Type:
+    Dict[ModelAPI, Type[HuggingFaceEmbeddings | OpenAIEmbeddings | OllamaEmbeddings]]
+Keys:
+    ModelAPI.HUGGINGFACE: Maps to HuggingFaceEmbeddings class
+    ModelAPI.OPENAI: Maps to OpenAIEmbeddings class
+    ModelAPI.OLLAMA: Maps to OllamaEmbeddings class
+Example:
+    >>> from pstuts_rag.configuration import ModelAPI
+    >>> from pstuts_rag.utils import EmbeddingsAPISelector
+    >>>
+    >>> # Get the appropriate embeddings model class
+    >>> api_provider = ModelAPI.HUGGINGFACE
+    >>> embeddings_class = EmbeddingsAPISelector[api_provider]
+    >>>
+    >>> # Instantiate the embeddings model
+    >>> embeddings = embeddings_class(
+    ...     model_name="sentence-transformers/all-MiniLM-L6-v2"
+    ... )
+    >>>
+    >>> # Alternative usage with configuration
+    >>> config = Configuration(embedding_api=ModelAPI.OPENAI)
+    >>> embeddings_class = EmbeddingsAPISelector[config.embedding_api]
+    >>> embeddings = embeddings_class(model="text-embedding-3-small")
+"""
+EmbeddingsAPISelector: Dict[
+    ModelAPI, Type[HuggingFaceEmbeddings | OpenAIEmbeddings | OllamaEmbeddings]
+] = {
+    ModelAPI.HUGGINGFACE: HuggingFaceEmbeddings,
+    ModelAPI.OPENAI: OpenAIEmbeddings,
+    ModelAPI.OLLAMA: OllamaEmbeddings,
+}

pyproject.toml CHANGED Viewed

@@ -45,6 +45,9 @@ dependencies = [
     "google>=3.0.0",
     "numpy==2.2.2",
     "tavily-python>=0.7.2",
 ]
 authors = [{ name = "Marko Budisic", email = "[email protected]" }]
 license = "MIT"

     "google>=3.0.0",
     "numpy==2.2.2",
     "tavily-python>=0.7.2",
+    "logging>=0.4.9.6",
+    "langchain-ollama>=0.3.2",
+    "simsimd>=6.2.1",
 ]
 authors = [{ name = "Marko Budisic", email = "[email protected]" }]
 license = "MIT"

uv.lock CHANGED Viewed

@@ -1704,6 +1704,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/0b/76/eb08f7b87f3377ced3800b2896841ccdcde3e246f46523946ecf092447e6/langchain_huggingface-0.2.0-py3-none-any.whl", hash = "sha256:eed1fdfe51d16d761499fa754491a1a4dcb61798c1e5516335071d1dad852a41", size = 27329 },
 ]
 [[package]]
 name = "langchain-openai"
 version = "0.3.8"
@@ -1839,6 +1852,12 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fc/fc/628b39e31b368aacbca51721ba7a66a4d140e9be916a0c7396664fdaed7a/literalai-0.1.103.tar.gz", hash = "sha256:060e86e63c0f53041a737b2183354ac092ee8cd9faec817dc95df639bb263a7d", size = 62540 }
 [[package]]
 name = "lxml"
 version = "5.4.0"
@@ -2412,6 +2431,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1", size = 89265 },
 ]
 [[package]]
 name = "openai"
 version = "1.59.9"
@@ -2958,10 +2990,12 @@ dependencies = [
     { name = "langchain-core" },
     { name = "langchain-experimental" },
     { name = "langchain-huggingface" },
     { name = "langchain-openai" },
     { name = "langchain-qdrant" },
     { name = "langgraph" },
     { name = "langsmith" },
     { name = "nest-asyncio" },
     { name = "numpy" },
     { name = "pandas" },
@@ -2973,6 +3007,7 @@ dependencies = [
     { name = "scikit-learn" },
     { name = "scipy" },
     { name = "sentence-transformers" },
     { name = "tavily-python" },
     { name = "torch" },
     { name = "tqdm" },
@@ -3023,10 +3058,12 @@ requires-dist = [
     { name = "langchain-core", specifier = ">=0.3.59" },
     { name = "langchain-experimental", specifier = ">=0.3.4" },
     { name = "langchain-huggingface", specifier = ">=0.2.0" },
     { name = "langchain-openai" },
     { name = "langchain-qdrant", specifier = ">=0.2.0" },
     { name = "langgraph", specifier = ">=0.4.3" },
     { name = "langsmith", specifier = ">=0.0.50" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=0.900" },
     { name = "nest-asyncio", specifier = ">=1.5.6" },
     { name = "numpy", specifier = "==2.2.2" },
@@ -3042,6 +3079,7 @@ requires-dist = [
     { name = "scikit-learn", specifier = ">=1.0.0" },
     { name = "scipy", specifier = ">=1.10.0" },
     { name = "sentence-transformers", specifier = ">=3.4.1" },
     { name = "tavily-python", specifier = ">=0.7.2" },
     { name = "torch", specifier = ">=2.0.0" },
     { name = "tqdm", specifier = ">=4.65.0" },
@@ -3933,6 +3971,65 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
 ]
 [[package]]
 name = "six"
 version = "1.17.0"

     { url = "https://files.pythonhosted.org/packages/0b/76/eb08f7b87f3377ced3800b2896841ccdcde3e246f46523946ecf092447e6/langchain_huggingface-0.2.0-py3-none-any.whl", hash = "sha256:eed1fdfe51d16d761499fa754491a1a4dcb61798c1e5516335071d1dad852a41", size = 27329 },
 ]
+[[package]]
+name = "langchain-ollama"
+version = "0.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "langchain-core" },
+    { name = "ollama" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/21/99/f548adc83e9f4d06805cc2fc530a94fca321318919c27ec8d5ad9fab51ab/langchain_ollama-0.3.2.tar.gz", hash = "sha256:9e747e7bda1b55cdfa344433814b035be3f06d1bae32b1ffb696ccfc12cfa8ae", size = 21147 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3a/a7/d0fff871a353e6f602fe94912576219b47639db5a08ad454ea914cc11c8d/langchain_ollama-0.3.2-py3-none-any.whl", hash = "sha256:f7516f2f22d912dba1286d644bff00c287799e758d7e465a1439244a1bb02820", size = 20702 },
+]
 [[package]]
 name = "langchain-openai"
 version = "0.3.8"
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/fc/fc/628b39e31b368aacbca51721ba7a66a4d140e9be916a0c7396664fdaed7a/literalai-0.1.103.tar.gz", hash = "sha256:060e86e63c0f53041a737b2183354ac092ee8cd9faec817dc95df639bb263a7d", size = 62540 }
+[[package]]
+name = "logging"
+version = "0.4.9.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/93/4b/979db9e44be09f71e85c9c8cfc42f258adfb7d93ce01deed2788b2948919/logging-0.4.9.6.tar.gz", hash = "sha256:26f6b50773f085042d301085bd1bf5d9f3735704db9f37c1ce6d8b85c38f2417", size = 96029 }
 [[package]]
 name = "lxml"
 version = "5.4.0"
     { url = "https://files.pythonhosted.org/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1", size = 89265 },
 ]
+[[package]]
+name = "ollama"
+version = "0.5.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+    { name = "pydantic" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8d/96/c7fe0d2d1b3053be614822a7b722c7465161b3672ce90df71515137580a0/ollama-0.5.1.tar.gz", hash = "sha256:5a799e4dc4e7af638b11e3ae588ab17623ee019e496caaf4323efbaa8feeff93", size = 41112 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d6/76/3f96c8cdbf3955d7a73ee94ce3e0db0755d6de1e0098a70275940d1aff2f/ollama-0.5.1-py3-none-any.whl", hash = "sha256:4c8839f35bc173c7057b1eb2cbe7f498c1a7e134eafc9192824c8aecb3617506", size = 13369 },
+]
 [[package]]
 name = "openai"
 version = "1.59.9"
     { name = "langchain-core" },
     { name = "langchain-experimental" },
     { name = "langchain-huggingface" },
+    { name = "langchain-ollama" },
     { name = "langchain-openai" },
     { name = "langchain-qdrant" },
     { name = "langgraph" },
     { name = "langsmith" },
+    { name = "logging" },
     { name = "nest-asyncio" },
     { name = "numpy" },
     { name = "pandas" },
     { name = "scikit-learn" },
     { name = "scipy" },
     { name = "sentence-transformers" },
+    { name = "simsimd" },
     { name = "tavily-python" },
     { name = "torch" },
     { name = "tqdm" },
     { name = "langchain-core", specifier = ">=0.3.59" },
     { name = "langchain-experimental", specifier = ">=0.3.4" },
     { name = "langchain-huggingface", specifier = ">=0.2.0" },
+    { name = "langchain-ollama", specifier = ">=0.3.2" },
     { name = "langchain-openai" },
     { name = "langchain-qdrant", specifier = ">=0.2.0" },
     { name = "langgraph", specifier = ">=0.4.3" },
     { name = "langsmith", specifier = ">=0.0.50" },
+    { name = "logging", specifier = ">=0.4.9.6" },
     { name = "mypy", marker = "extra == 'dev'", specifier = ">=0.900" },
     { name = "nest-asyncio", specifier = ">=1.5.6" },
     { name = "numpy", specifier = "==2.2.2" },
     { name = "scikit-learn", specifier = ">=1.0.0" },
     { name = "scipy", specifier = ">=1.10.0" },
     { name = "sentence-transformers", specifier = ">=3.4.1" },
+    { name = "simsimd", specifier = ">=6.2.1" },
     { name = "tavily-python", specifier = ">=0.7.2" },
     { name = "torch", specifier = ">=2.0.0" },
     { name = "tqdm", specifier = ">=4.65.0" },
     { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
 ]
+[[package]]
+name = "simsimd"
+version = "6.2.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/da/1c/90e6ec0f0de20108fdd7d5665ac2916b1e8c893ce2f8d7481fd37eabbb97/simsimd-6.2.1.tar.gz", hash = "sha256:5e202c5386a4141946b7aee05faac8ebc2e36bca0a360b24080e57b59bc4ef6a", size = 165828 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/5f/361cee272fd6c88f33e14e233792f59dd58836ea8c776344f7445a829ca2/simsimd-6.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e9614309af75be4d08a051dc61ed5cf41b5239b8303b37dc2f9c8a7223534392", size = 170254 },
+    { url = "https://files.pythonhosted.org/packages/b8/88/edf4442ec655765d570bfb6cef81dfb12c8829c28e580459bac8a4847fb5/simsimd-6.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ea4f0f68be5f85bbcf4322bfdd1b449176cf5fdd99960c546514457635632443", size = 102331 },
+    { url = "https://files.pythonhosted.org/packages/5d/2b/9e7d42ac54bdb32d76953db3bc83eec29bd5d5c9a4069d380b18e200d6bd/simsimd-6.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:12a8d60ccc8991dfbbf056c221ce4f02135f5892492894972f421a6f155015d9", size = 93455 },
+    { url = "https://files.pythonhosted.org/packages/13/9c/fac1167e80328d1e332f515c9cd62da4a0e12b9aa8ee90d448eb4ad5a47f/simsimd-6.2.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a74142ea21a6fd3ec5c64e4d4acf1ec6f4d80c0bb1a5989d68af6e84f7ac612e", size = 251040 },
+    { url = "https://files.pythonhosted.org/packages/31/93/b374e5538fc65cf381920bdba7603769b1b71e42afe2bb4939e9c338c423/simsimd-6.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298f7c793fc2a1eeedcefa1278eb2ef6f52ce0b36aaa8780885f96a39ce1a4e8", size = 302428 },
+    { url = "https://files.pythonhosted.org/packages/e6/42/2733a0e11b660c6b10f3ec90d7fac6f96267368b961b1a43dda0456fa9f2/simsimd-6.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4025ebad36fb3fa5cffcd48d33375d5e5decc59c1129a259b74fed097eab1ab5", size = 227200 },
+    { url = "https://files.pythonhosted.org/packages/eb/ae/40e0804d06a351efe27bb6f8e4d332daeb1681d3f398ca10d8a2b087ab78/simsimd-6.2.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f486682aa7a8918d86df411d3c11c635db4b67d514cb6bb499c0edab7fb8ec58", size = 432333 },
+    { url = "https://files.pythonhosted.org/packages/a7/eb/a823b0227b5dc43de8125f502237dd8e844b1e803a74e46aa7c3d0f24f83/simsimd-6.2.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:173e66699597a4fcf6fa50b52cced40216fdcfba15f60b761a2bd9cb1d98a444", size = 632659 },
+    { url = "https://files.pythonhosted.org/packages/0a/aa/aee48063c4a98aaea062316dedf598d0d9e09fa9edc28baab6886ae0afa8/simsimd-6.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5b5c6f79f797cc020a2ff64950162dfb6d130c51a07cdac5ad97ec836e85ce50", size = 468407 },
+    { url = "https://files.pythonhosted.org/packages/d4/84/e89bc71456aa2d48e5acf3795b2384f597de643f17d00d752aa8217af233/simsimd-6.2.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:25812637f43feaef1a33ae00b81a4d2b0116aadae3a08267486c1e57236fc368", size = 268908 },
+    { url = "https://files.pythonhosted.org/packages/94/eb/774debec7ee727f436f15e5b5416b781c78564fff97c81a5fb3b636b4298/simsimd-6.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:592a578c788a9cb7877eff41487cc7f50474e00f774de74bea8590fa95c804ae", size = 344256 },
+    { url = "https://files.pythonhosted.org/packages/62/03/fec040e7fbb66fa4766ca959cfd766a22d7a00a4e9371f046d8fcc62d846/simsimd-6.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:191c020f312350ac06eee829376b11d8c1282da8fefb4381fe0625edfb678d8d", size = 389403 },
+    { url = "https://files.pythonhosted.org/packages/55/f0/ad441d90a4dde6e100155931fa4468e33cc23276c3caef6330d2a34b866c/simsimd-6.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e9ad2c247ed58ba9bb170a01295cb315a45c817775cc7e51ad342f70978a1057", size = 316665 },
+    { url = "https://files.pythonhosted.org/packages/05/27/843adbc6a468a58178dcb7907e72c670c8a7c36a06d8a4c5eac9573f5d2d/simsimd-6.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0ff603134600da12175e66b842b7a7331c827fa070d1d8b63386a40bc8d09fcd", size = 669697 },
+    { url = "https://files.pythonhosted.org/packages/6d/db/d2369e0d3b9ca469b923bc81d57dcfed922193e4e4d7cf5f7637df14dd51/simsimd-6.2.1-cp311-cp311-win32.whl", hash = "sha256:99dff4e04663c82284152ecc2e8bf76b2825f3f17e179abf7892e06196061056", size = 55007 },
+    { url = "https://files.pythonhosted.org/packages/73/9f/13d6fca5a32a062e84db0a68433ae416073986c8e1d20b5b936cad18bece/simsimd-6.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:0efc6343c440a26cf16463c4c667655af9597bcbd55ad66f33a80b2b84de7412", size = 86855 },
+    { url = "https://files.pythonhosted.org/packages/64/e9/7e0514f32c9a0e42261f598775b34a858477e0fcffccf32cc11f94e78ee2/simsimd-6.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:2d364f2c24dd38578bf0eec436c4b901c900ae1893680f46eb5632e01330d814", size = 60195 },
+    { url = "https://files.pythonhosted.org/packages/81/87/1f521d471d9079d89dd6860b9dd5d0f39c1633675a30b71acd0bd37cbba5/simsimd-6.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9b3315e41bb759dc038ecd6f4fa7bcf278bf72ee7d982f752482cdc732aea271", size = 169397 },
+    { url = "https://files.pythonhosted.org/packages/4b/1a/b0627589737dc75ccd2ed58893e9e7f8b8e082531bd34d319481d88018d5/simsimd-6.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d476c874bafa0d12d4c8c5c47faf17407f3c96140616384421c2aa980342b6f", size = 101478 },
+    { url = "https://files.pythonhosted.org/packages/e0/b7/e766f0ce9b595927ae1c534f1409b768187e8af567f4412ca220b67c1155/simsimd-6.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9d4f15c06cc221d29e181197c7bbf92c5e829220cbeb3cd1cf080de78b04f2a", size = 93439 },
+    { url = "https://files.pythonhosted.org/packages/ae/48/3b5ec9b3a6063bae2f280f5168aca7099a44fa7ec8b42875b98c79c1d49b/simsimd-6.2.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d286fd4538cb1a1c70e69da00a3acee301519d578931b41161f4f1379d1195c6", size = 251469 },
+    { url = "https://files.pythonhosted.org/packages/70/86/16e8d5b9bdd34f75c7515adfad249f394653131bd1a1366076cf6113e84b/simsimd-6.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:050f68cfa85f1fb2cfa156280928e42926e3977034b755023ce1315bf59e87ff", size = 302974 },
+    { url = "https://files.pythonhosted.org/packages/02/09/3f4240f2b43957aa0d72a2203b2549c0326c7baf97b7f78c72d48d4cd3d2/simsimd-6.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:67bb4b17e04919545f29c7b708faaccbe027f164f8b5c9f4328604fa8f5560ea", size = 227864 },
+    { url = "https://files.pythonhosted.org/packages/07/4a/8c46806493c3a98025f01d81d9f55e0e574f11279c2ad77be919262ea9eb/simsimd-6.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3d6bffd999dbb36e606b065e0180365efac2606049c4f7818e4cba2d34c3678f", size = 432491 },
+    { url = "https://files.pythonhosted.org/packages/13/44/b56f207031405af52c6158c40e9f1121fe3a716d98946d9fa5919cf00266/simsimd-6.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:25adb244fb75dbf49af0d1bcac4ed4a3fef8e847d78449faa5595af0a3e20d61", size = 633061 },
+    { url = "https://files.pythonhosted.org/packages/4c/ad/241f87641af09a1789af8df559aa86b45218d087e09c37c2dd8c013819d6/simsimd-6.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b4542cee77e801a9c27370fc36ae271514fc0fb2ce14a35f8b25f47989e3d267", size = 468544 },
+    { url = "https://files.pythonhosted.org/packages/e2/3e/357aca7df85ed1092dfa50b91cf1b7c0df6f70b384a0e3798132dd824b5c/simsimd-6.2.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:4f665228f8ff4911790b485e74b00fa9586a141dde6011970be71bb303b5a22f", size = 269133 },
+    { url = "https://files.pythonhosted.org/packages/f0/67/079ca2c58bbc5812802c6ac1b332a6ef889d73cf1188726f36edc27898f6/simsimd-6.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:783b4308f80ae00763b0eaa0dac26196958f9c2df60d35a0347ebd2f82ece46d", size = 344412 },
+    { url = "https://files.pythonhosted.org/packages/3c/f0/500c9002276259c17e3a6a13a7c7f84e5119602decadbf40429c978655b0/simsimd-6.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:95055e72cfe313c1c8694783bf8a631cc15673b3b775abef367e396d931db0b8", size = 389546 },
+    { url = "https://files.pythonhosted.org/packages/55/a2/d3f4c6aabba0430758367b3de5bbab59b979bf3525c039b882001f1d2ade/simsimd-6.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a98f2b383f51b4f4ee568a637fc7958a347fdae0bd184cff8faa8030b6454a39", size = 316912 },
+    { url = "https://files.pythonhosted.org/packages/f8/a3/2514189c3aaa1beb1714b36be86e2d3af7067c3c95152d78cc4cffff6d87/simsimd-6.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2e474fd10ceb38e2c9f826108a7762f8ff7912974846d86f08c4e7b19cd35ed4", size = 670006 },
+    { url = "https://files.pythonhosted.org/packages/ef/23/dbf7c4aed7542260784dc7bc2056a4e5b6d716a14a9b40989d5c3096990a/simsimd-6.2.1-cp312-cp312-win32.whl", hash = "sha256:b2530ea44fffeab25e5752bec6a5991f30fbc430b04647980db5b195c0971d48", size = 55019 },
+    { url = "https://files.pythonhosted.org/packages/a0/d8/57304c2317822634abd475f5912584a3cfa13363740e9ec72c0622c894f1/simsimd-6.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:dc23283235d5b8f0373b95a547e26da2d7785647a5d0fa15c282fc8c49c0dcb0", size = 87133 },
+    { url = "https://files.pythonhosted.org/packages/3f/7b/ca333232a8bc87d1e846fa2feb9f0d4778500c30493726cb48f04551dfab/simsimd-6.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:5692ce7e56253178eea9dbd58191734918409b83d54b07cfdcecf868d0150a73", size = 60401 },
+    { url = "https://files.pythonhosted.org/packages/9b/f2/4ec7ed52c910a58a07043c5f3355adf4055246dafb79be57d0726e1a4aa0/simsimd-6.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:76b32fdc7142c9714e94651ece8bc00dd5139c554813211552aa358e44af0e07", size = 169399 },
+    { url = "https://files.pythonhosted.org/packages/61/d3/5af24e4f42e2b5bc3a06456ea9068d0fbcd23d8ceeb0e09fe54ed72cfdba/simsimd-6.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f44e5e2319427f94db658c6f75caae78850da505902874a1664a83ef5713f333", size = 101484 },
+    { url = "https://files.pythonhosted.org/packages/cf/86/816050f0fd0767e960c6b900e3c97fd6a4ae54a6aa5b8ef24846757a3f7d/simsimd-6.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:05323cbad7200592c2e53fbcc759e615594e8ca444ef5eddf9f3fb196ad4de9c", size = 93447 },
+    { url = "https://files.pythonhosted.org/packages/e9/7e/61dc3392eafd9fc20357b448aac5f84c84ad61289ab0ab3e5a4aaa1ca3ef/simsimd-6.2.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1f3cbe5c39db2bb64f30999104de1215ba3805d6059af7bc5a9d662d50f4707", size = 251501 },
+    { url = "https://files.pythonhosted.org/packages/06/55/99d3cf2c2d844c1a57d81379acaebac2e0a0efdf1e73a53990cd84c1d719/simsimd-6.2.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaa94e0932ae2a48b7e4df8c29204dc9fe59f72b1faeb08e9d5015bf51fb9f21", size = 302991 },
+    { url = "https://files.pythonhosted.org/packages/6f/99/597b322835147f407e6f611810cb8232055711398fbbd47e6a14bfc0995f/simsimd-6.2.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:508465f8d4e3e0fff07c939921aeedf55b0ade9f56f64e938c350c283dea42fb", size = 227917 },
+    { url = "https://files.pythonhosted.org/packages/ba/8a/6a6596a97d1cc7068a26935bbdd7f170a889240b8081e000aef09b6d0549/simsimd-6.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ca67f6273ef544c74c48b134af756de7c98a711ccf69cd0791225f26dd449281", size = 432527 },
+    { url = "https://files.pythonhosted.org/packages/46/0e/5c6e82fa9fe9a21481fe0f6546b4986e07e42bd4d8b6f04f4475b8d7564e/simsimd-6.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d470b43ce606f21f54a23fc19ad6928333e17d0956b02eb27b7b112edc156a10", size = 633095 },
+    { url = "https://files.pythonhosted.org/packages/ae/53/2e17bd16e2ca2a73cd447b89fa7059ae7275c82840f229bf917936ee800a/simsimd-6.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59518b9834c167a1dd8900600718e95cdadc9d74525452f426aa8455a38c55ef", size = 468561 },
+    { url = "https://files.pythonhosted.org/packages/86/8b/1319605c630973741bc749b6e432e56dded2b6a7db0744b659c0de613ab3/simsimd-6.2.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:59c2978c4e402097d8a4b38f076ff98cc43e6b059d53f89736404f26e9a9bd5a", size = 269157 },
+    { url = "https://files.pythonhosted.org/packages/53/50/1cac5113a542c82d5b5399d454c578a65ba14951bfff38aef297104f72fe/simsimd-6.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:edc68e727d53ed2866dcfb625f15e52be8f1e6809f4be2147bf8d2115a2542b7", size = 344437 },
+    { url = "https://files.pythonhosted.org/packages/9a/72/44905ee0e2ed999c52ad1eebf2c8705ce2776212a6387d77355df2c76704/simsimd-6.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9e5e82551d75c0e2cd0d4b8af8db1cae7b5ac6dcc076c0c760870ff81f78135b", size = 389569 },
+    { url = "https://files.pythonhosted.org/packages/ee/d6/9b4a9141ceb29150d86698553c8e0193256b069bc755e875836c14a6f12e/simsimd-6.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2fa19f8c9786757d19afcbda9f8fb68de55e4f5562725ae8727f887d01bf0e4d", size = 316923 },
+    { url = "https://files.pythonhosted.org/packages/ce/c0/de6aebd58b8de8f0177395b8fd68afb9a27ec010427c4ccd6104b94b6569/simsimd-6.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b0748aa6bd4df4c5a3f5e979aec14b26588f1b2e0d44075dcc9eaf4d555e15b", size = 670038 },
+    { url = "https://files.pythonhosted.org/packages/77/32/4c74664656231ccb43be4328dba40e9ada63d3cc1e557b1785ae0b9560b5/simsimd-6.2.1-cp313-cp313-win32.whl", hash = "sha256:7f43721e1a4ebe8d2245b0e85dd7de7153d1bf22839579d5f69a345909c68d9e", size = 55017 },
+    { url = "https://files.pythonhosted.org/packages/76/7f/57e02f6b2d09a1d42697e739b002bbe2112f8b8384d15d166154ec4cec44/simsimd-6.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:6af1565e0ef7060bc52a38e3273a8e6e92aff47835965dc5311298563475935e", size = 87138 },
+    { url = "https://files.pythonhosted.org/packages/38/b9/941876e98dd1f98c158cd5e6633dc1573d1be6daf8f2e3ad5d15e6a8024d/simsimd-6.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:e690b41377c8dd157d585713b0bc35c845aee7742334bf12d1f087fc8a65b6c3", size = 60408 },
+]
 [[package]]
 name = "six"
 version = "1.17.0"