Spaces:
Sleeping
Sleeping
feat: Add multi-provider API support with configurable model selectors
Browse files- Introduce ModelAPI enum for OpenAI, HuggingFace, and Ollama providers
- Add ChatAPISelector and EmbeddingsAPISelector for dynamic model instantiation
- Update related modules to support configurable AI model providers
- Enable runtime selection of chat and embedding models based on configuration
- .vscode/settings.json +2 -0
- app.py +21 -95
- notebooks/transcript_rag.ipynb +165 -170
- pstuts_rag/pstuts_rag/configuration.py +30 -1
- pstuts_rag/pstuts_rag/datastore.py +99 -85
- pstuts_rag/pstuts_rag/graph.py +128 -5
- pstuts_rag/pstuts_rag/rag.py +6 -2
- pstuts_rag/pstuts_rag/rag_for_transcripts.py +150 -0
- pstuts_rag/pstuts_rag/utils.py +92 -0
- pyproject.toml +3 -0
- uv.lock +97 -0
.vscode/settings.json
CHANGED
@@ -2,6 +2,8 @@
|
|
2 |
"python.pythonPath": "/home/mbudisic/Documents/PsTuts-RAG/.venv/bin/python",
|
3 |
"cSpell.words": [
|
4 |
"chainlit",
|
|
|
|
|
5 |
"pstuts",
|
6 |
"qdrant"
|
7 |
],
|
|
|
2 |
"python.pythonPath": "/home/mbudisic/Documents/PsTuts-RAG/.venv/bin/python",
|
3 |
"cSpell.words": [
|
4 |
"chainlit",
|
5 |
+
"huggingface",
|
6 |
+
"ollama",
|
7 |
"pstuts",
|
8 |
"qdrant"
|
9 |
],
|
app.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1 |
from pstuts_rag.configuration import Configuration
|
|
|
|
|
2 |
from pstuts_rag.state import PsTutsTeamState
|
3 |
import requests
|
4 |
import asyncio
|
5 |
import json
|
6 |
import os
|
7 |
import getpass
|
8 |
-
from typing import
|
9 |
import re
|
10 |
|
11 |
import chainlit as cl
|
@@ -15,27 +17,20 @@ from langchain_core.language_models import BaseChatModel
|
|
15 |
from langchain_core.runnables import Runnable
|
16 |
from langchain_openai import ChatOpenAI
|
17 |
from langchain_core.embeddings import Embeddings
|
18 |
-
from
|
19 |
-
|
20 |
|
21 |
from langchain_core.messages import HumanMessage, BaseMessage
|
22 |
import langgraph.graph
|
23 |
|
24 |
-
from pstuts_rag.graph import create_team_supervisor
|
25 |
-
from pstuts_rag.graph import create_tavily_node
|
26 |
|
27 |
import pstuts_rag.datastore
|
28 |
import pstuts_rag.rag
|
29 |
|
30 |
-
from pstuts_rag.graph import create_rag_node
|
31 |
-
|
32 |
-
from pstuts_rag.datastore import load_json_files
|
33 |
-
from pstuts_rag.prompts import SUPERVISOR_SYSTEM
|
34 |
|
35 |
import nest_asyncio
|
36 |
from uuid import uuid4
|
37 |
|
38 |
-
from sentence_transformers import SentenceTransformer
|
39 |
import logging
|
40 |
|
41 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
@@ -70,7 +65,7 @@ class ApplicationState:
|
|
70 |
Maintains the state of the application and its components.
|
71 |
|
72 |
Attributes:
|
73 |
-
embeddings:
|
74 |
docs: List of loaded documents
|
75 |
qdrant_client: Client for Qdrant vector database
|
76 |
vector_store: Vector store for document retrieval
|
@@ -87,15 +82,15 @@ class ApplicationState:
|
|
87 |
|
88 |
embeddings: Embeddings = None
|
89 |
docs: List[Document] = []
|
90 |
-
qdrant_client
|
91 |
-
vector_store
|
92 |
-
datastore_manager
|
93 |
-
rag
|
94 |
-
llm: BaseChatModel
|
95 |
-
rag_chain: Runnable
|
96 |
|
97 |
-
ai_graph: Runnable
|
98 |
-
ai_graph_sketch
|
99 |
|
100 |
tasks: List[asyncio.Task] = []
|
101 |
|
@@ -126,83 +121,12 @@ ai_state = PsTutsTeamState(
|
|
126 |
)
|
127 |
|
128 |
|
129 |
-
async def fill_the_db(
|
130 |
-
state: ApplicationState,
|
131 |
-
):
|
132 |
-
"""
|
133 |
-
Populates the vector database with document data if it's empty.
|
134 |
-
|
135 |
-
Args:
|
136 |
-
state: Application state containing the datastore manager
|
137 |
-
|
138 |
-
Returns:
|
139 |
-
0 if database already has documents, otherwise None
|
140 |
-
"""
|
141 |
-
data: List[Dict[str, Any]] = await load_json_files(params.filename)
|
142 |
-
|
143 |
-
_ = await state.rag.build_chain(data)
|
144 |
-
await cl.Message(
|
145 |
-
content=f"✅ The database has been loaded with {state.rag.pointsLoaded} elements!"
|
146 |
-
).send()
|
147 |
-
|
148 |
-
|
149 |
-
async def build_the_graph(current_state: ApplicationState):
|
150 |
-
"""
|
151 |
-
Builds the agent graph for routing user queries.
|
152 |
-
|
153 |
-
Creates the necessary nodes (Adobe help, RAG search, supervisor), defines their
|
154 |
-
connections, and compiles the graph into a runnable chain.
|
155 |
-
|
156 |
-
Args:
|
157 |
-
current_state: Current application state with required components
|
158 |
-
"""
|
159 |
-
adobe_help_node, _, _ = create_tavily_node(
|
160 |
-
llm=app_state.llm, name=ADOBEHELP
|
161 |
-
)
|
162 |
-
|
163 |
-
rag_node, _ = create_rag_node(
|
164 |
-
rag_chain=current_state.rag.rag_chain,
|
165 |
-
name=VIDEOARCHIVE,
|
166 |
-
)
|
167 |
-
|
168 |
-
supervisor_agent = create_team_supervisor(
|
169 |
-
current_state.llm,
|
170 |
-
SUPERVISOR_SYSTEM,
|
171 |
-
[VIDEOARCHIVE, ADOBEHELP],
|
172 |
-
)
|
173 |
-
|
174 |
-
ai_graph = langgraph.graph.StateGraph(PsTutsTeamState)
|
175 |
-
|
176 |
-
ai_graph.add_node(VIDEOARCHIVE, rag_node)
|
177 |
-
ai_graph.add_node(ADOBEHELP, adobe_help_node)
|
178 |
-
ai_graph.add_node("supervisor", supervisor_agent)
|
179 |
-
|
180 |
-
edges = [
|
181 |
-
[VIDEOARCHIVE, "supervisor"],
|
182 |
-
[ADOBEHELP, "supervisor"],
|
183 |
-
]
|
184 |
-
|
185 |
-
[ai_graph.add_edge(*p) for p in edges]
|
186 |
-
|
187 |
-
ai_graph.add_conditional_edges(
|
188 |
-
"supervisor",
|
189 |
-
lambda x: x["next"],
|
190 |
-
{
|
191 |
-
VIDEOARCHIVE: VIDEOARCHIVE,
|
192 |
-
ADOBEHELP: ADOBEHELP,
|
193 |
-
"FINISH": langgraph.graph.END,
|
194 |
-
},
|
195 |
-
)
|
196 |
-
|
197 |
-
ai_graph.set_entry_point("supervisor")
|
198 |
-
app_state.ai_graph_sketch = ai_graph
|
199 |
-
app_state.ai_graph = enter_chain | ai_graph.compile()
|
200 |
-
|
201 |
-
|
202 |
async def initialize():
|
203 |
|
204 |
await fill_the_db(app_state)
|
205 |
-
await build_the_graph(
|
|
|
|
|
206 |
|
207 |
|
208 |
def enter_chain(message: str):
|
@@ -233,8 +157,10 @@ async def on_chat_start():
|
|
233 |
for database population and graph building.
|
234 |
"""
|
235 |
app_state.llm = ChatOpenAI(model=params.tool_calling_model, temperature=0)
|
236 |
-
|
237 |
-
app_state.embeddings =
|
|
|
|
|
238 |
|
239 |
app_state.rag = pstuts_rag.rag.RAGChainInstance(
|
240 |
name="deployed",
|
|
|
1 |
from pstuts_rag.configuration import Configuration
|
2 |
+
from pstuts_rag.datastore import fill_the_db
|
3 |
+
from pstuts_rag.graph import build_the_graph
|
4 |
from pstuts_rag.state import PsTutsTeamState
|
5 |
import requests
|
6 |
import asyncio
|
7 |
import json
|
8 |
import os
|
9 |
import getpass
|
10 |
+
from typing import List, Tuple
|
11 |
import re
|
12 |
|
13 |
import chainlit as cl
|
|
|
17 |
from langchain_core.runnables import Runnable
|
18 |
from langchain_openai import ChatOpenAI
|
19 |
from langchain_core.embeddings import Embeddings
|
20 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
21 |
+
|
22 |
|
23 |
from langchain_core.messages import HumanMessage, BaseMessage
|
24 |
import langgraph.graph
|
25 |
|
|
|
|
|
26 |
|
27 |
import pstuts_rag.datastore
|
28 |
import pstuts_rag.rag
|
29 |
|
|
|
|
|
|
|
|
|
30 |
|
31 |
import nest_asyncio
|
32 |
from uuid import uuid4
|
33 |
|
|
|
34 |
import logging
|
35 |
|
36 |
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
|
65 |
Maintains the state of the application and its components.
|
66 |
|
67 |
Attributes:
|
68 |
+
embeddings: Embeddings model for vector operations
|
69 |
docs: List of loaded documents
|
70 |
qdrant_client: Client for Qdrant vector database
|
71 |
vector_store: Vector store for document retrieval
|
|
|
82 |
|
83 |
embeddings: Embeddings = None
|
84 |
docs: List[Document] = []
|
85 |
+
qdrant_client = None
|
86 |
+
vector_store = None
|
87 |
+
datastore_manager = None
|
88 |
+
rag = None
|
89 |
+
llm: BaseChatModel = None
|
90 |
+
rag_chain: Runnable = None
|
91 |
|
92 |
+
ai_graph: Runnable = None
|
93 |
+
ai_graph_sketch = None
|
94 |
|
95 |
tasks: List[asyncio.Task] = []
|
96 |
|
|
|
121 |
)
|
122 |
|
123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
async def initialize():
|
125 |
|
126 |
await fill_the_db(app_state)
|
127 |
+
app_state.ai_graph, app_state.ai_graph_sketch = await build_the_graph(
|
128 |
+
app_state
|
129 |
+
)
|
130 |
|
131 |
|
132 |
def enter_chain(message: str):
|
|
|
157 |
for database population and graph building.
|
158 |
"""
|
159 |
app_state.llm = ChatOpenAI(model=params.tool_calling_model, temperature=0)
|
160 |
+
# Use LangChain's built-in HuggingFaceEmbeddings wrapper
|
161 |
+
app_state.embeddings = HuggingFaceEmbeddings(
|
162 |
+
model_name=params.embedding_model
|
163 |
+
)
|
164 |
|
165 |
app_state.rag = pstuts_rag.rag.RAGChainInstance(
|
166 |
name="deployed",
|
notebooks/transcript_rag.ipynb
CHANGED
@@ -2,267 +2,231 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
-
"metadata": {},
|
7 |
-
"outputs": [],
|
8 |
-
"source": [
|
9 |
-
"import os\n",
|
10 |
-
"from getpass import getpass\n",
|
11 |
-
"from dotenv import load_dotenv\n",
|
12 |
-
"\n"
|
13 |
-
]
|
14 |
-
},
|
15 |
-
{
|
16 |
-
"cell_type": "code",
|
17 |
-
"execution_count": 17,
|
18 |
-
"metadata": {},
|
19 |
-
"outputs": [],
|
20 |
-
"source": [
|
21 |
-
"import pstuts_rag"
|
22 |
-
]
|
23 |
-
},
|
24 |
-
{
|
25 |
-
"cell_type": "code",
|
26 |
-
"execution_count": 18,
|
27 |
"metadata": {},
|
28 |
"outputs": [
|
29 |
{
|
30 |
"name": "stdout",
|
31 |
"output_type": "stream",
|
32 |
"text": [
|
33 |
-
"
|
34 |
-
" %reload_ext autoreload\n"
|
35 |
]
|
36 |
}
|
37 |
],
|
38 |
"source": [
|
39 |
-
"%
|
40 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
]
|
42 |
},
|
43 |
{
|
44 |
"cell_type": "code",
|
45 |
-
"execution_count":
|
46 |
"metadata": {},
|
47 |
"outputs": [],
|
48 |
"source": [
|
49 |
-
"
|
50 |
-
"@dataclass\n",
|
51 |
-
"class ApplicationParameters:\n",
|
52 |
-
" filename = \"data/test.json\"\n",
|
53 |
-
" embedding_model = \"text-embedding-3-small\"\n",
|
54 |
-
" n_context_docs = 2\n",
|
55 |
-
"\n",
|
56 |
-
"params = ApplicationParameters()"
|
57 |
]
|
58 |
},
|
59 |
{
|
60 |
"cell_type": "code",
|
61 |
-
"execution_count":
|
62 |
"metadata": {},
|
63 |
"outputs": [],
|
64 |
"source": [
|
65 |
-
"\n",
|
66 |
-
"
|
67 |
-
"
|
68 |
-
"
|
69 |
-
"
|
70 |
-
" prompt_message=key_name\n",
|
71 |
-
" if key_name not in os.environ or not os.environ[key_name]:\n",
|
72 |
-
" os.environ[key_name] = getpass.getpass(prompt_message)\n",
|
73 |
-
"\n",
|
74 |
-
"set_api_key_if_not_present(\"OPENAI_API_KEY\")"
|
75 |
-
]
|
76 |
-
},
|
77 |
-
{
|
78 |
-
"cell_type": "markdown",
|
79 |
-
"metadata": {},
|
80 |
-
"source": [
|
81 |
-
"# Data Preparation\n",
|
82 |
-
"\n",
|
83 |
-
"First, we will read in the transcripts of the videos and convert them to Documents\n",
|
84 |
-
"with appropriate metadata."
|
85 |
]
|
86 |
},
|
87 |
{
|
88 |
"cell_type": "code",
|
89 |
-
"execution_count":
|
90 |
"metadata": {},
|
91 |
"outputs": [],
|
92 |
"source": [
|
93 |
-
"
|
94 |
-
"
|
95 |
-
"\n",
|
96 |
-
"from pstuts_rag.loader import load_json_files\n",
|
97 |
-
"filename = [\"../data/test.json\",\"../data/dev.json\"]\n",
|
98 |
-
"from typing import List, Dict, Any\n",
|
99 |
-
"data:List[Dict[str,Any]] = await load_json_files(filename)\n"
|
100 |
]
|
101 |
},
|
102 |
{
|
103 |
"cell_type": "code",
|
104 |
-
"execution_count":
|
105 |
"metadata": {},
|
106 |
"outputs": [
|
107 |
{
|
108 |
"data": {
|
109 |
"text/plain": [
|
110 |
-
"
|
111 |
-
" 'Remove unwanted objects from photos',\n",
|
112 |
-
" 'Include vector graphics',\n",
|
113 |
-
" 'Remove unwanted content',\n",
|
114 |
-
" 'Add a central element',\n",
|
115 |
-
" 'Set the resolution',\n",
|
116 |
-
" 'Understand layers',\n",
|
117 |
-
" 'Adjust brightness and contrast',\n",
|
118 |
-
" 'Remove a large object',\n",
|
119 |
-
" 'Add text',\n",
|
120 |
-
" 'Replace a background using a layer mask',\n",
|
121 |
-
" 'Use layers for ultimate flexibility and control',\n",
|
122 |
-
" 'Select part of an image',\n",
|
123 |
-
" 'Get to know layers',\n",
|
124 |
-
" 'Improve lighting and color',\n",
|
125 |
-
" 'Add dreamlike elements to the composite',\n",
|
126 |
-
" 'Expand the canvas',\n",
|
127 |
-
" 'Resize a layer',\n",
|
128 |
-
" 'Adjust hue and saturation',\n",
|
129 |
-
" 'Learn selection basics',\n",
|
130 |
-
" 'Choose a color',\n",
|
131 |
-
" 'Add texture to an image']"
|
132 |
]
|
133 |
},
|
134 |
-
"execution_count":
|
135 |
"metadata": {},
|
136 |
"output_type": "execute_result"
|
137 |
}
|
138 |
],
|
139 |
"source": [
|
140 |
-
"
|
141 |
-
]
|
142 |
-
},
|
143 |
-
{
|
144 |
-
"cell_type": "markdown",
|
145 |
-
"metadata": {},
|
146 |
-
"source": [
|
147 |
-
"## R - retrieval"
|
148 |
-
]
|
149 |
-
},
|
150 |
-
{
|
151 |
-
"cell_type": "markdown",
|
152 |
-
"metadata": {},
|
153 |
-
"source": [
|
154 |
-
"Let's hit it with a semantic chunker."
|
155 |
]
|
156 |
},
|
157 |
{
|
158 |
"cell_type": "code",
|
159 |
-
"execution_count":
|
160 |
-
"metadata": {},
|
161 |
-
"outputs": [],
|
162 |
-
"source": [
|
163 |
-
"from pstuts_rag.datastore import DatastoreManager\n",
|
164 |
-
"from qdrant_client import QdrantClient\n",
|
165 |
-
"\n",
|
166 |
-
"client = QdrantClient(\":memory:\")\n",
|
167 |
-
"\n",
|
168 |
-
"retriever_factory = DatastoreManager(qdrant_client=client,name=\"local_test\")\n",
|
169 |
-
"if retriever_factory.count_docs() == 0:\n",
|
170 |
-
" await retriever_factory.populate_database(raw_docs=data)"
|
171 |
-
]
|
172 |
-
},
|
173 |
-
{
|
174 |
-
"cell_type": "markdown",
|
175 |
-
"metadata": {},
|
176 |
-
"source": [
|
177 |
-
"## A - Augmentation\n",
|
178 |
-
"\n",
|
179 |
-
"We need to populate a prompt for LLM.\n"
|
180 |
-
]
|
181 |
-
},
|
182 |
-
{
|
183 |
-
"cell_type": "markdown",
|
184 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
"source": [
|
186 |
-
"
|
187 |
-
"\n",
|
188 |
-
"We will use a 4.1-nano to generate answers."
|
189 |
]
|
190 |
},
|
191 |
{
|
192 |
"cell_type": "code",
|
193 |
-
"execution_count":
|
194 |
"metadata": {},
|
195 |
"outputs": [
|
196 |
{
|
197 |
-
"name": "
|
198 |
"output_type": "stream",
|
199 |
"text": [
|
200 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
]
|
202 |
}
|
203 |
],
|
204 |
"source": [
|
205 |
-
"
|
206 |
-
"\n",
|
207 |
-
"rag_factory = RAGChainFactory(retriever=retriever_factory.get_retriever())"
|
208 |
]
|
209 |
},
|
210 |
{
|
211 |
"cell_type": "code",
|
212 |
-
"execution_count":
|
213 |
"metadata": {},
|
214 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
"source": [
|
216 |
-
"
|
217 |
-
"\n",
|
218 |
-
"llm = ChatOpenAI(model=\"gpt-4.1-mini\",temperature=0)"
|
219 |
]
|
220 |
},
|
221 |
{
|
222 |
"cell_type": "code",
|
223 |
-
"execution_count":
|
224 |
"metadata": {},
|
225 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
226 |
"source": [
|
227 |
-
"
|
228 |
-
" \n"
|
229 |
]
|
230 |
},
|
231 |
{
|
232 |
"cell_type": "code",
|
233 |
-
"execution_count":
|
234 |
"metadata": {},
|
235 |
"outputs": [],
|
236 |
"source": [
|
237 |
-
"
|
238 |
]
|
239 |
},
|
240 |
{
|
241 |
"cell_type": "code",
|
242 |
-
"execution_count":
|
243 |
"metadata": {},
|
244 |
"outputs": [
|
245 |
{
|
246 |
-
"
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
]
|
253 |
-
},
|
254 |
-
"execution_count": 28,
|
255 |
-
"metadata": {},
|
256 |
-
"output_type": "execute_result"
|
257 |
}
|
258 |
],
|
259 |
"source": [
|
260 |
-
"
|
261 |
]
|
262 |
},
|
263 |
{
|
264 |
"cell_type": "code",
|
265 |
-
"execution_count":
|
266 |
"metadata": {},
|
267 |
"outputs": [
|
268 |
{
|
@@ -271,7 +235,30 @@
|
|
271 |
"text": [
|
272 |
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
273 |
"\n",
|
274 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
"**REFERENCES**\n",
|
276 |
"[\n",
|
277 |
" {\n",
|
@@ -281,37 +268,45 @@
|
|
281 |
" \"stop\": 62.14\n",
|
282 |
" },\n",
|
283 |
" {\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
" \"title\": \"Understand layers\",\n",
|
285 |
" \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\n",
|
286 |
-
" \"start\":
|
287 |
-
" \"stop\":
|
288 |
" }\n",
|
289 |
"]\n"
|
290 |
]
|
291 |
}
|
292 |
],
|
293 |
"source": [
|
294 |
-
"
|
295 |
]
|
296 |
},
|
297 |
{
|
298 |
"cell_type": "code",
|
299 |
-
"execution_count":
|
300 |
"metadata": {},
|
301 |
"outputs": [
|
302 |
{
|
303 |
"data": {
|
304 |
"text/plain": [
|
305 |
-
"'Layers are the building blocks of any image in Photoshop CC. You
|
|
|
|
|
306 |
]
|
307 |
},
|
308 |
-
"execution_count":
|
309 |
"metadata": {},
|
310 |
"output_type": "execute_result"
|
311 |
}
|
312 |
],
|
313 |
"source": [
|
314 |
-
"
|
315 |
]
|
316 |
},
|
317 |
{
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"metadata": {},
|
7 |
"outputs": [
|
8 |
{
|
9 |
"name": "stdout",
|
10 |
"output_type": "stream",
|
11 |
"text": [
|
12 |
+
"/home/mbudisic/Documents/PsTuts-RAG\n"
|
|
|
13 |
]
|
14 |
}
|
15 |
],
|
16 |
"source": [
|
17 |
+
"%cd /home/mbudisic/Documents/PsTuts-RAG\n",
|
18 |
+
"import os\n",
|
19 |
+
"from getpass import getpass\n",
|
20 |
+
"from dotenv import load_dotenv\n",
|
21 |
+
"from pstuts_rag.configuration import Configuration\n",
|
22 |
+
"import asyncio\n",
|
23 |
+
"\n",
|
24 |
+
"import nest_asyncio\n",
|
25 |
+
"nest_asyncio.apply()\n"
|
26 |
]
|
27 |
},
|
28 |
{
|
29 |
"cell_type": "code",
|
30 |
+
"execution_count": 2,
|
31 |
"metadata": {},
|
32 |
"outputs": [],
|
33 |
"source": [
|
34 |
+
"import pstuts_rag"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
]
|
36 |
},
|
37 |
{
|
38 |
"cell_type": "code",
|
39 |
+
"execution_count": 3,
|
40 |
"metadata": {},
|
41 |
"outputs": [],
|
42 |
"source": [
|
43 |
+
"import logging\n",
|
44 |
+
"logging.basicConfig(\n",
|
45 |
+
" level=Configuration().eva_log_level,\n",
|
46 |
+
" format='%(asctime)s - %(levelname)s - %(funcName)s - %(message)s'\n",
|
47 |
+
")"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
]
|
49 |
},
|
50 |
{
|
51 |
"cell_type": "code",
|
52 |
+
"execution_count": 4,
|
53 |
"metadata": {},
|
54 |
"outputs": [],
|
55 |
"source": [
|
56 |
+
"%load_ext autoreload\n",
|
57 |
+
"%autoreload 2\n"
|
|
|
|
|
|
|
|
|
|
|
58 |
]
|
59 |
},
|
60 |
{
|
61 |
"cell_type": "code",
|
62 |
+
"execution_count": 5,
|
63 |
"metadata": {},
|
64 |
"outputs": [
|
65 |
{
|
66 |
"data": {
|
67 |
"text/plain": [
|
68 |
+
"True"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
]
|
70 |
},
|
71 |
+
"execution_count": 5,
|
72 |
"metadata": {},
|
73 |
"output_type": "execute_result"
|
74 |
}
|
75 |
],
|
76 |
"source": [
|
77 |
+
"load_dotenv()\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
]
|
79 |
},
|
80 |
{
|
81 |
"cell_type": "code",
|
82 |
+
"execution_count": 6,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
"metadata": {},
|
84 |
+
"outputs": [
|
85 |
+
{
|
86 |
+
"name": "stderr",
|
87 |
+
"output_type": "stream",
|
88 |
+
"text": [
|
89 |
+
"2025-05-30 20:08:35,183 - INFO - <module> - Loaded .env file\n"
|
90 |
+
]
|
91 |
+
}
|
92 |
+
],
|
93 |
"source": [
|
94 |
+
"from pstuts_rag.rag_for_transcripts import *"
|
|
|
|
|
95 |
]
|
96 |
},
|
97 |
{
|
98 |
"cell_type": "code",
|
99 |
+
"execution_count": 7,
|
100 |
"metadata": {},
|
101 |
"outputs": [
|
102 |
{
|
103 |
+
"name": "stderr",
|
104 |
"output_type": "stream",
|
105 |
"text": [
|
106 |
+
"2025-05-30 20:08:36,978 - INFO - print - Configuration parameters:\n",
|
107 |
+
"2025-05-30 20:08:36,980 - INFO - print - eva_workflow_name: EVA_workflow\n",
|
108 |
+
"2025-05-30 20:08:36,980 - INFO - print - eva_log_level: INFO\n",
|
109 |
+
"2025-05-30 20:08:36,981 - INFO - print - transcript_glob: ./data/dev.json:./data/test.json\n",
|
110 |
+
"2025-05-30 20:08:36,982 - INFO - print - embedding_model: mxbai-embed-large\n",
|
111 |
+
"2025-05-30 20:08:36,983 - INFO - print - embedding_api: ModelAPI.OLLAMA\n",
|
112 |
+
"2025-05-30 20:08:36,984 - INFO - print - llm_api: ModelAPI.OLLAMA\n",
|
113 |
+
"2025-05-30 20:08:36,985 - INFO - print - max_research_loops: 2\n",
|
114 |
+
"2025-05-30 20:08:36,986 - INFO - print - llm_tool_model: deepseek-r1:8b\n",
|
115 |
+
"2025-05-30 20:08:36,987 - INFO - print - n_context_docs: 3\n"
|
116 |
]
|
117 |
}
|
118 |
],
|
119 |
"source": [
|
120 |
+
"Configuration().print(logging.info)"
|
|
|
|
|
121 |
]
|
122 |
},
|
123 |
{
|
124 |
"cell_type": "code",
|
125 |
+
"execution_count": 8,
|
126 |
"metadata": {},
|
127 |
+
"outputs": [
|
128 |
+
{
|
129 |
+
"name": "stderr",
|
130 |
+
"output_type": "stream",
|
131 |
+
"text": [
|
132 |
+
"2025-05-30 20:08:37,093 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
133 |
+
"2025-05-30 20:08:37,118 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n"
|
134 |
+
]
|
135 |
+
}
|
136 |
+
],
|
137 |
"source": [
|
138 |
+
"datastore:DatastoreManager = startup(callback_on_loading_complete=lambda _: logging.warning(\"Loading complete.\")) "
|
|
|
|
|
139 |
]
|
140 |
},
|
141 |
{
|
142 |
"cell_type": "code",
|
143 |
+
"execution_count": 9,
|
144 |
"metadata": {},
|
145 |
+
"outputs": [
|
146 |
+
{
|
147 |
+
"name": "stderr",
|
148 |
+
"output_type": "stream",
|
149 |
+
"text": [
|
150 |
+
"2025-05-30 20:08:38,120 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
151 |
+
"2025-05-30 20:08:39,173 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
152 |
+
"2025-05-30 20:08:39,862 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
153 |
+
"2025-05-30 20:08:40,765 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
154 |
+
"2025-05-30 20:08:41,275 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
155 |
+
"2025-05-30 20:08:41,539 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
156 |
+
"2025-05-30 20:08:42,447 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
157 |
+
"2025-05-30 20:08:43,415 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
158 |
+
"2025-05-30 20:08:44,236 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
159 |
+
"2025-05-30 20:08:45,746 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
160 |
+
"2025-05-30 20:08:45,770 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
161 |
+
"2025-05-30 20:08:46,832 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
162 |
+
"2025-05-30 20:08:47,754 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
163 |
+
"2025-05-30 20:08:48,859 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
164 |
+
"2025-05-30 20:08:49,732 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
165 |
+
"2025-05-30 20:08:50,740 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
166 |
+
"2025-05-30 20:08:51,604 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
167 |
+
"2025-05-30 20:08:52,113 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
168 |
+
"2025-05-30 20:08:53,060 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
169 |
+
"2025-05-30 20:08:53,895 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
170 |
+
"2025-05-30 20:08:54,734 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
171 |
+
"2025-05-30 20:08:55,707 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
172 |
+
"2025-05-30 20:08:56,114 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
173 |
+
"2025-05-30 20:08:56,447 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
174 |
+
"2025-05-30 20:08:56,765 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
175 |
+
"2025-05-30 20:08:56,878 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
176 |
+
"2025-05-30 20:08:57,200 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
177 |
+
"2025-05-30 20:08:57,438 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
178 |
+
"2025-05-30 20:08:57,750 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
179 |
+
"2025-05-30 20:08:58,116 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
180 |
+
"2025-05-30 20:08:58,713 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
181 |
+
"2025-05-30 20:08:59,059 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
182 |
+
"2025-05-30 20:08:59,110 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n"
|
183 |
+
]
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"data": {
|
187 |
+
"text/plain": [
|
188 |
+
"True"
|
189 |
+
]
|
190 |
+
},
|
191 |
+
"execution_count": 9,
|
192 |
+
"metadata": {},
|
193 |
+
"output_type": "execute_result"
|
194 |
+
}
|
195 |
+
],
|
196 |
"source": [
|
197 |
+
"await datastore.wait_for_loading()"
|
|
|
198 |
]
|
199 |
},
|
200 |
{
|
201 |
"cell_type": "code",
|
202 |
+
"execution_count": 10,
|
203 |
"metadata": {},
|
204 |
"outputs": [],
|
205 |
"source": [
|
206 |
+
"chain = retrieve_videos(datastore)"
|
207 |
]
|
208 |
},
|
209 |
{
|
210 |
"cell_type": "code",
|
211 |
+
"execution_count": 11,
|
212 |
"metadata": {},
|
213 |
"outputs": [
|
214 |
{
|
215 |
+
"name": "stderr",
|
216 |
+
"output_type": "stream",
|
217 |
+
"text": [
|
218 |
+
"2025-05-30 20:08:59,268 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/embed \"HTTP/1.1 200 OK\"\n",
|
219 |
+
"2025-05-30 20:09:11,924 - INFO - _send_single_request - HTTP Request: POST http://127.0.0.1:11434/api/chat \"HTTP/1.1 200 OK\"\n"
|
220 |
+
]
|
|
|
|
|
|
|
|
|
|
|
221 |
}
|
222 |
],
|
223 |
"source": [
|
224 |
+
"response = chain.invoke({\"question\":\"What is a layer?\"})"
|
225 |
]
|
226 |
},
|
227 |
{
|
228 |
"cell_type": "code",
|
229 |
+
"execution_count": 12,
|
230 |
"metadata": {},
|
231 |
"outputs": [
|
232 |
{
|
|
|
235 |
"text": [
|
236 |
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
237 |
"\n",
|
238 |
+
"<think>\n",
|
239 |
+
"Okay, I need to figure out what a layer is based on the provided transcript. Let me go through the context step by step.\n",
|
240 |
+
"\n",
|
241 |
+
"First, looking at the first document with video ID 19172. The title says \"Understand layers\" and the description mentions that layers are the building blocks of any image in Photoshop CC. So, layers must be fundamental components.\n",
|
242 |
+
"\n",
|
243 |
+
"In the page content, it compares layers to separate flat prints of glass stacked on top of each other. Each layer has different content. That makes me think of layers as separate elements or parts of an image that can be edited individually.\n",
|
244 |
+
"\n",
|
245 |
+
"There's also a mention of the Layers panel where you select and work with layers. It shows 4 layers, each with distinct content. The Eye icon can toggle visibility, so layers can be shown or hidden. This suggests that layers are like different layers of content that can be managed separately.\n",
|
246 |
+
"\n",
|
247 |
+
"Looking at another document with video ID 4103, it says layers allow isolation of individual pieces of content within a composition. For example, text or brush strokes can be separated into their own layers for detailed editing without affecting other parts.\n",
|
248 |
+
"\n",
|
249 |
+
"Another mention from the same video talks about using layers for adding highlights or shadows by isolating small elements, which means each part can be worked on independently.\n",
|
250 |
+
"\n",
|
251 |
+
"Putting this together, a layer is like a separate sheet in an image that holds different elements. Each layer allows you to edit, move, or manipulate specific parts without affecting others. The Layers panel helps manage and control these layers for better organization and editing flexibility.\n",
|
252 |
+
"</think>\n",
|
253 |
+
"\n",
|
254 |
+
"🎨 **What is a Layer?** \n",
|
255 |
+
"Layers are like separate sheets in an image, each holding distinct content. Think of them as individual elements stacked on top of each other, allowing you to edit or manipulate specific parts without affecting others. \n",
|
256 |
+
"\n",
|
257 |
+
"For example: \n",
|
258 |
+
"- Each layer can contain text, images, or design elements. \n",
|
259 |
+
"- You can toggle their visibility using the Eye icon. \n",
|
260 |
+
"\n",
|
261 |
+
"📌 **Timestamp**: 0.47 - 3.41 minutes (video ID 19172)\n",
|
262 |
"**REFERENCES**\n",
|
263 |
"[\n",
|
264 |
" {\n",
|
|
|
268 |
" \"stop\": 62.14\n",
|
269 |
" },\n",
|
270 |
" {\n",
|
271 |
+
" \"title\": \"Use layers for ultimate flexibility and control\",\n",
|
272 |
+
" \"source\": \"https://videos-tv.adobe.com/2014-09-04/96f51d8958ae31b37cb5a15cbdc21744.mp4\",\n",
|
273 |
+
" \"start\": 0.82,\n",
|
274 |
+
" \"stop\": 30.13\n",
|
275 |
+
" },\n",
|
276 |
+
" {\n",
|
277 |
" \"title\": \"Understand layers\",\n",
|
278 |
" \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\n",
|
279 |
+
" \"start\": 280.4,\n",
|
280 |
+
" \"stop\": 284.58\n",
|
281 |
" }\n",
|
282 |
"]\n"
|
283 |
]
|
284 |
}
|
285 |
],
|
286 |
"source": [
|
287 |
+
"response.pretty_print()"
|
288 |
]
|
289 |
},
|
290 |
{
|
291 |
"cell_type": "code",
|
292 |
+
"execution_count": 13,
|
293 |
"metadata": {},
|
294 |
"outputs": [
|
295 |
{
|
296 |
"data": {
|
297 |
"text/plain": [
|
298 |
+
"[Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'data/test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[0.47, 3.41], [3.81, 9.13], [9.309999, 15.01], [15.299999, 20.57], [20.88, 23.3], [23.83, 27.93], [29.38, 32.79], [32.96, 33.92], [34.43, 40.21], [41.91, 45.37], [45.88, 49.01], [49.54, 55.130001], [55.72, 58.49], [58.72, 62.14]], 'start': 0.47, 'stop': 62.14, '_id': 63, '_collection_name': 'dc0cf104-0069-4983-8a12-8d3de4132745'}, page_content=\"Layers are the building blocks of any image in Photoshop CC. So, it's important to understand, what layers are and why to use them - which we'll cover in this video. If you're following along, open this layered image from the downloadable practice files for this tutorial. You might think of layers like separate flat pints of glass, stacked one on top of the other. Each layer contains separate pieces of content. To get a sense of how layers are constructed, let's take a look at this Layers panel. I've closed my other panels, so that we can focus on the Layers panel. But you can skip that. By the way: If your Layers panel isn't showing, go up to the Window menu and choose Layers from there. The Layers panel is where you go to select and work with layers. In this image there are 4 layers, each with separate content. If you click the Eye icon to the left of a layer, you can toggle the visibility of that layer off and on. So, I'm going to turn off the visibility of the tailor layer. And keep your eye on the image, so you can see what's on that layer.\"),\n",
|
299 |
+
" Document(metadata={'video_id': 4103, 'title': 'Use layers for ultimate flexibility and control', 'desc': 'Learn how to use layers to create designs, fix photos, or build collages.', 'length': '00:05:06.55', 'group': 'data/dev.json', 'source': 'https://videos-tv.adobe.com/2014-09-04/96f51d8958ae31b37cb5a15cbdc21744.mp4', 'speech_start_stop_times': [[0.82, 5.88], [6.51, 18.389999], [19.219999, 30.13]], 'start': 0.82, 'stop': 30.13, '_id': 0, '_collection_name': 'dc0cf104-0069-4983-8a12-8d3de4132745'}, page_content=\"As a new Photoshop user, you're going to find that the layers panel is an incredibly powerful tool. The layers panel gives you the ability to isolate individual pieces of content away from the rest of the composition giving you the ability to work on individual elements within the overall document. Now, this can be used for something as literal as some type in this case, or something as subtle as a small brush stroke to add a highlight or shadow to an image.\"),\n",
|
300 |
+
" Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'data/test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[280.4, 284.58]], 'start': 280.4, 'stop': 284.58, '_id': 66, '_collection_name': 'dc0cf104-0069-4983-8a12-8d3de4132745'}, page_content=\"There's lots more to learn about layers, so stay tuned for the rest of this tutorial.\")]"
|
301 |
]
|
302 |
},
|
303 |
+
"execution_count": 13,
|
304 |
"metadata": {},
|
305 |
"output_type": "execute_result"
|
306 |
}
|
307 |
],
|
308 |
"source": [
|
309 |
+
"response.additional_kwargs[\"context\"]"
|
310 |
]
|
311 |
},
|
312 |
{
|
pstuts_rag/pstuts_rag/configuration.py
CHANGED
@@ -1,10 +1,20 @@
|
|
1 |
import os
|
|
|
2 |
from dataclasses import dataclass, fields
|
3 |
from typing import Any, Optional
|
|
|
4 |
|
5 |
from langchain_core.runnables import RunnableConfig
|
6 |
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
@dataclass(kw_only=True)
|
9 |
class Configuration:
|
10 |
"""
|
@@ -13,6 +23,7 @@ class Configuration:
|
|
13 |
Attributes:
|
14 |
transcript_glob: Glob pattern for transcript JSON files (supports multiple files separated by ':')
|
15 |
embedding_model: Name of the embedding model to use (default: custom fine-tuned snowflake model)
|
|
|
16 |
max_research_loops: Maximum number of research loops to perform
|
17 |
llm_tool_model: Name of the LLM model to use for tool calling
|
18 |
n_context_docs: Number of context documents to retrieve for RAG
|
@@ -34,9 +45,19 @@ class Configuration:
|
|
34 |
)
|
35 |
)
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
max_research_loops: int = int(os.environ.get("MAX_RESEARCH_LOOPS", "3"))
|
38 |
|
39 |
-
llm_tool_model: str = str(
|
|
|
|
|
40 |
n_context_docs: int = int(os.environ.get("N_CONTEXT_DOCS", "2"))
|
41 |
|
42 |
@classmethod
|
@@ -55,3 +76,11 @@ class Configuration:
|
|
55 |
if f.init
|
56 |
}
|
57 |
return cls(**{k: v for k, v in values.items() if v})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import logging
|
3 |
from dataclasses import dataclass, fields
|
4 |
from typing import Any, Optional
|
5 |
+
from enum import Enum
|
6 |
|
7 |
from langchain_core.runnables import RunnableConfig
|
8 |
|
9 |
|
10 |
+
class ModelAPI(Enum):
|
11 |
+
"""Enum for supported embedding API providers."""
|
12 |
+
|
13 |
+
OPENAI = "OPENAI"
|
14 |
+
HUGGINGFACE = "HUGGINGFACE"
|
15 |
+
OLLAMA = "OLLAMA"
|
16 |
+
|
17 |
+
|
18 |
@dataclass(kw_only=True)
|
19 |
class Configuration:
|
20 |
"""
|
|
|
23 |
Attributes:
|
24 |
transcript_glob: Glob pattern for transcript JSON files (supports multiple files separated by ':')
|
25 |
embedding_model: Name of the embedding model to use (default: custom fine-tuned snowflake model)
|
26 |
+
embedding_api: API provider for embeddings (OPENAI or HUGGINGFACE)
|
27 |
max_research_loops: Maximum number of research loops to perform
|
28 |
llm_tool_model: Name of the LLM model to use for tool calling
|
29 |
n_context_docs: Number of context documents to retrieve for RAG
|
|
|
45 |
)
|
46 |
)
|
47 |
|
48 |
+
embedding_api: ModelAPI = ModelAPI(
|
49 |
+
os.environ.get("EMBEDDING_API", ModelAPI.HUGGINGFACE.value)
|
50 |
+
)
|
51 |
+
|
52 |
+
llm_api: ModelAPI = ModelAPI(
|
53 |
+
os.environ.get("LLM_API", ModelAPI.OPENAI.value)
|
54 |
+
)
|
55 |
+
|
56 |
max_research_loops: int = int(os.environ.get("MAX_RESEARCH_LOOPS", "3"))
|
57 |
|
58 |
+
llm_tool_model: str = str(
|
59 |
+
os.environ.get("LLM_TOOL_MODEL", "smollm2:1.7b-instruct-q2_K")
|
60 |
+
)
|
61 |
n_context_docs: int = int(os.environ.get("N_CONTEXT_DOCS", "2"))
|
62 |
|
63 |
@classmethod
|
|
|
76 |
if f.init
|
77 |
}
|
78 |
return cls(**{k: v for k, v in values.items() if v})
|
79 |
+
|
80 |
+
def print(self, print_like_function=logging.info) -> None:
|
81 |
+
"""Log all configuration parameters using logging.debug."""
|
82 |
+
print_like_function("Configuration parameters:")
|
83 |
+
for field in fields(self):
|
84 |
+
if field.init:
|
85 |
+
value = getattr(self, field.name)
|
86 |
+
print_like_function(" %s: %s", field.name, value)
|
pstuts_rag/pstuts_rag/datastore.py
CHANGED
@@ -3,25 +3,28 @@ import json
|
|
3 |
import glob
|
4 |
import aiofiles
|
5 |
from pathlib import Path
|
6 |
-
from typing import List, Dict, Iterator, Any
|
7 |
import uuid
|
8 |
-
|
9 |
|
10 |
import chainlit as cl
|
11 |
from langchain_core.document_loaders import BaseLoader
|
12 |
from langchain_experimental.text_splitter import SemanticChunker
|
|
|
13 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
|
|
14 |
from langchain_core.documents import Document
|
15 |
from langchain_core.embeddings import Embeddings
|
16 |
|
17 |
from langchain_core.vectorstores import VectorStoreRetriever
|
18 |
|
19 |
from langchain_qdrant import QdrantVectorStore
|
|
|
20 |
from qdrant_client import QdrantClient
|
21 |
from qdrant_client.http.models import Distance, VectorParams
|
22 |
from qdrant_client.models import PointStruct
|
23 |
|
24 |
-
from
|
25 |
|
26 |
|
27 |
def batch(iterable: List[Any], size: int = 16) -> Iterator[List[Any]]:
|
@@ -201,13 +204,6 @@ async def chunk_transcripts(
|
|
201 |
for group in docs_group:
|
202 |
docs_chunks_semantic.extend(group)
|
203 |
|
204 |
-
# locate individual sections of the original transcript
|
205 |
-
# with the semantic chunks
|
206 |
-
def is_subchunk(a: Document, ofb: Document) -> bool:
|
207 |
-
return (a.metadata["video_id"] == ofb.metadata["video_id"]) and (
|
208 |
-
a.page_content in ofb.page_content
|
209 |
-
)
|
210 |
-
|
211 |
# Create a lookup dictionary for faster access
|
212 |
video_id_to_chunks: Dict[int, List[Document]] = {}
|
213 |
for chunk in docs_chunks_verbatim:
|
@@ -256,6 +252,8 @@ class DatastoreManager:
|
|
256 |
qdrant_client: Client for Qdrant vector database
|
257 |
name: Unique identifier for this retriever instance
|
258 |
vector_store: The Qdrant vector store instance
|
|
|
|
|
259 |
"""
|
260 |
|
261 |
embeddings: Embeddings
|
@@ -264,14 +262,17 @@ class DatastoreManager:
|
|
264 |
name: str
|
265 |
vector_store: QdrantVectorStore
|
266 |
dimensions: int
|
|
|
|
|
|
|
|
|
267 |
|
268 |
def __init__(
|
269 |
self,
|
270 |
-
embeddings: Embeddings =
|
271 |
-
model="text-embedding-3-small"
|
272 |
-
),
|
273 |
qdrant_client: QdrantClient = QdrantClient(location=":memory:"),
|
274 |
name: str = str(object=uuid.uuid4()),
|
|
|
275 |
) -> None:
|
276 |
"""Initialize the RetrieverFactory.
|
277 |
|
@@ -280,12 +281,23 @@ class DatastoreManager:
|
|
280 |
qdrant_client: Qdrant client for vector database operations
|
281 |
name: Unique identifier for this retriever instance
|
282 |
"""
|
283 |
-
|
284 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
285 |
self.qdrant_client = qdrant_client
|
|
|
|
|
286 |
|
287 |
# determine embedding dimension
|
288 |
-
self.dimensions = len(embeddings.embed_query("test"))
|
289 |
|
290 |
self.qdrant_client.recreate_collection(
|
291 |
collection_name=self.name,
|
@@ -298,11 +310,21 @@ class DatastoreManager:
|
|
298 |
self.vector_store = QdrantVectorStore(
|
299 |
client=self.qdrant_client,
|
300 |
collection_name=self.name,
|
301 |
-
embedding=embeddings,
|
302 |
)
|
303 |
|
304 |
self.docs = []
|
305 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
async def populate_database(self, raw_docs: List[Dict[str, Any]]) -> int:
|
307 |
"""
|
308 |
Populate the vector database with processed video transcript documents.
|
@@ -362,6 +384,14 @@ class DatastoreManager:
|
|
362 |
points=points,
|
363 |
)
|
364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
365 |
return len(points)
|
366 |
|
367 |
def count_docs(self) -> int:
|
@@ -403,38 +433,53 @@ class DatastoreManager:
|
|
403 |
search_kwargs={"k": n_context_docs}
|
404 |
)
|
405 |
|
|
|
|
|
406 |
|
407 |
-
|
408 |
-
|
409 |
-
|
|
|
410 |
|
411 |
-
|
412 |
-
|
413 |
|
414 |
-
|
415 |
-
|
416 |
-
transcript data and metadata
|
417 |
-
group (str): Group identifier to be added to each video entry,
|
418 |
-
typically used for organizing videos by source or category
|
419 |
|
420 |
-
|
421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
|
423 |
-
|
424 |
-
|
425 |
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
|
|
|
|
|
|
|
|
435 |
|
436 |
|
437 |
-
async def load_single_json(filepath):
|
438 |
"""
|
439 |
Asynchronously load and parse a single JSON file containing video data.
|
440 |
|
@@ -461,12 +506,13 @@ async def load_single_json(filepath):
|
|
461 |
|
462 |
async with aiofiles.open(my_path, mode="r", encoding="utf-8") as f:
|
463 |
content = await f.read()
|
464 |
-
payload =
|
465 |
-
|
|
|
466 |
return payload
|
467 |
|
468 |
|
469 |
-
async def load_json_files(
|
470 |
"""
|
471 |
Asynchronously load and parse multiple JSON files matching given patterns.
|
472 |
|
@@ -475,7 +521,7 @@ async def load_json_files(path_pattern: List[str]):
|
|
475 |
is designed to handle large datasets efficiently by leveraging async I/O.
|
476 |
|
477 |
Args:
|
478 |
-
|
479 |
Supports standard glob syntax including recursive
|
480 |
patterns with ** for subdirectory traversal.
|
481 |
|
@@ -493,48 +539,16 @@ async def load_json_files(path_pattern: List[str]):
|
|
493 |
>>> videos = await load_json_files(patterns)
|
494 |
>>> len(videos) # Total videos from all matched files
|
495 |
"""
|
|
|
|
|
496 |
files = []
|
497 |
-
for
|
498 |
-
|
|
|
|
|
|
|
|
|
499 |
|
500 |
tasks = [load_single_json(f) for f in files]
|
501 |
results = await asyncio.gather(*tasks)
|
502 |
return [item for sublist in results for item in sublist] # flatten
|
503 |
-
|
504 |
-
|
505 |
-
async def fill_the_db(
|
506 |
-
state: ApplicationState,
|
507 |
-
):
|
508 |
-
"""
|
509 |
-
Initialize and populate the vector database with video transcript data.
|
510 |
-
|
511 |
-
This function serves as the main entry point for database initialization.
|
512 |
-
It loads video data from configured file patterns, processes them through
|
513 |
-
the RAG pipeline, and provides user feedback about the loading process.
|
514 |
-
|
515 |
-
The function is designed to be idempotent - it can be called multiple times
|
516 |
-
safely and will only populate the database if it's empty.
|
517 |
-
|
518 |
-
Args:
|
519 |
-
state (ApplicationState): Application state object containing the RAG
|
520 |
-
system and datastore manager for database operations
|
521 |
-
|
522 |
-
Returns:
|
523 |
-
None: Function operates through side effects (database population and UI updates)
|
524 |
-
|
525 |
-
Side Effects:
|
526 |
-
- Populates the vector database with processed video transcripts
|
527 |
-
- Sends confirmation message to the user interface
|
528 |
-
- Updates the state.rag.pointsLoaded counter
|
529 |
-
|
530 |
-
Note:
|
531 |
-
Uses the params.filename configuration to determine which files to load.
|
532 |
-
Sends a Chainlit message to inform users of successful database loading.
|
533 |
-
"""
|
534 |
-
data: List[Dict[str, Any]] = await load_json_files(params.filename)
|
535 |
-
|
536 |
-
_ = await state.rag.build_chain(data)
|
537 |
-
await cl.Message(
|
538 |
-
content=f"✅ The database has been loaded with "
|
539 |
-
f"{state.rag.pointsLoaded} elements!"
|
540 |
-
).send()
|
|
|
3 |
import glob
|
4 |
import aiofiles
|
5 |
from pathlib import Path
|
6 |
+
from typing import List, Dict, Iterator, Any, Callable, Optional
|
7 |
import uuid
|
8 |
+
import logging
|
9 |
|
10 |
import chainlit as cl
|
11 |
from langchain_core.document_loaders import BaseLoader
|
12 |
from langchain_experimental.text_splitter import SemanticChunker
|
13 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
14 |
from langchain_openai.embeddings import OpenAIEmbeddings
|
15 |
+
from langchain_ollama.embeddings import OllamaEmbeddings
|
16 |
from langchain_core.documents import Document
|
17 |
from langchain_core.embeddings import Embeddings
|
18 |
|
19 |
from langchain_core.vectorstores import VectorStoreRetriever
|
20 |
|
21 |
from langchain_qdrant import QdrantVectorStore
|
22 |
+
from pstuts_rag.configuration import Configuration, ModelAPI
|
23 |
from qdrant_client import QdrantClient
|
24 |
from qdrant_client.http.models import Distance, VectorParams
|
25 |
from qdrant_client.models import PointStruct
|
26 |
|
27 |
+
from pstuts_rag.utils import EmbeddingsAPISelector
|
28 |
|
29 |
|
30 |
def batch(iterable: List[Any], size: int = 16) -> Iterator[List[Any]]:
|
|
|
204 |
for group in docs_group:
|
205 |
docs_chunks_semantic.extend(group)
|
206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
207 |
# Create a lookup dictionary for faster access
|
208 |
video_id_to_chunks: Dict[int, List[Document]] = {}
|
209 |
for chunk in docs_chunks_verbatim:
|
|
|
252 |
qdrant_client: Client for Qdrant vector database
|
253 |
name: Unique identifier for this retriever instance
|
254 |
vector_store: The Qdrant vector store instance
|
255 |
+
loading_complete: AsyncIO event that's set when data loading completes
|
256 |
+
_completion_callbacks: List of callbacks to call when loading completes
|
257 |
"""
|
258 |
|
259 |
embeddings: Embeddings
|
|
|
262 |
name: str
|
263 |
vector_store: QdrantVectorStore
|
264 |
dimensions: int
|
265 |
+
loading_complete: asyncio.Event
|
266 |
+
_completion_callbacks: List[Callable]
|
267 |
+
|
268 |
+
config: Optional[Configuration]
|
269 |
|
270 |
def __init__(
|
271 |
self,
|
272 |
+
embeddings: Optional[Embeddings] = None,
|
|
|
|
|
273 |
qdrant_client: QdrantClient = QdrantClient(location=":memory:"),
|
274 |
name: str = str(object=uuid.uuid4()),
|
275 |
+
config: Configuration = Configuration(),
|
276 |
) -> None:
|
277 |
"""Initialize the RetrieverFactory.
|
278 |
|
|
|
281 |
qdrant_client: Qdrant client for vector database operations
|
282 |
name: Unique identifier for this retriever instance
|
283 |
"""
|
284 |
+
|
285 |
+
if embeddings is None:
|
286 |
+
|
287 |
+
cls = EmbeddingsAPISelector.get(
|
288 |
+
config.embedding_api, HuggingFaceEmbeddings
|
289 |
+
)
|
290 |
+
self.embeddings = cls(model=config.embedding_model)
|
291 |
+
else:
|
292 |
+
self.embeddings = embeddings
|
293 |
+
|
294 |
+
self.name = name if name else config.eva_workflow_name
|
295 |
self.qdrant_client = qdrant_client
|
296 |
+
self.loading_complete = asyncio.Event()
|
297 |
+
self._completion_callbacks = []
|
298 |
|
299 |
# determine embedding dimension
|
300 |
+
self.dimensions = len(self.embeddings.embed_query("test"))
|
301 |
|
302 |
self.qdrant_client.recreate_collection(
|
303 |
collection_name=self.name,
|
|
|
310 |
self.vector_store = QdrantVectorStore(
|
311 |
client=self.qdrant_client,
|
312 |
collection_name=self.name,
|
313 |
+
embedding=self.embeddings,
|
314 |
)
|
315 |
|
316 |
self.docs = []
|
317 |
|
318 |
+
async def from_json_globs(self, globs: List[str]) -> int:
|
319 |
+
|
320 |
+
logging.debug("Starting to load files.")
|
321 |
+
data = await load_json_files(globs)
|
322 |
+
logging.debug("Received %d JSON files.", len(data))
|
323 |
+
count = await self.populate_database(data)
|
324 |
+
logging.debug("Uploaded %d records.", count)
|
325 |
+
|
326 |
+
return count
|
327 |
+
|
328 |
async def populate_database(self, raw_docs: List[Dict[str, Any]]) -> int:
|
329 |
"""
|
330 |
Populate the vector database with processed video transcript documents.
|
|
|
384 |
points=points,
|
385 |
)
|
386 |
|
387 |
+
self.loading_complete.set()
|
388 |
+
# Execute callbacks (both sync and async)
|
389 |
+
for callback in self._completion_callbacks:
|
390 |
+
if asyncio.iscoroutinefunction(callback):
|
391 |
+
await callback()
|
392 |
+
else:
|
393 |
+
callback()
|
394 |
+
|
395 |
return len(points)
|
396 |
|
397 |
def count_docs(self) -> int:
|
|
|
433 |
search_kwargs={"k": n_context_docs}
|
434 |
)
|
435 |
|
436 |
+
def is_ready(self) -> bool:
|
437 |
+
"""Check if the datastore has finished loading data.
|
438 |
|
439 |
+
Returns:
|
440 |
+
bool: True if data loading is complete, False otherwise
|
441 |
+
"""
|
442 |
+
return self.loading_complete.is_set()
|
443 |
|
444 |
+
def add_completion_callback(self, callback: Callable):
|
445 |
+
"""Add a callback to be called when data loading completes.
|
446 |
|
447 |
+
Args:
|
448 |
+
callback: Callable function to be called when data loading completes
|
|
|
|
|
|
|
449 |
|
450 |
+
Note:
|
451 |
+
If loading has already completed, the callback will be called immediately.
|
452 |
+
"""
|
453 |
+
if self.loading_complete.is_set():
|
454 |
+
# Loading already completed, execute callback immediately
|
455 |
+
if asyncio.iscoroutinefunction(callback):
|
456 |
+
# Need to schedule async callback
|
457 |
+
asyncio.create_task(callback())
|
458 |
+
else:
|
459 |
+
callback()
|
460 |
+
else:
|
461 |
+
# Loading not complete, add to callbacks list
|
462 |
+
self._completion_callbacks.append(callback)
|
463 |
|
464 |
+
async def wait_for_loading(self, timeout: Optional[float] = None):
|
465 |
+
"""Wait for data loading to complete.
|
466 |
|
467 |
+
Args:
|
468 |
+
timeout: Maximum time to wait in seconds (None for no timeout)
|
469 |
+
|
470 |
+
Returns:
|
471 |
+
bool: True if loading completed, False if timeout occurred
|
472 |
+
"""
|
473 |
+
try:
|
474 |
+
await asyncio.wait_for(
|
475 |
+
self.loading_complete.wait(), timeout=timeout
|
476 |
+
)
|
477 |
+
return True
|
478 |
+
except asyncio.TimeoutError:
|
479 |
+
return False
|
480 |
|
481 |
|
482 |
+
async def load_single_json(filepath: str):
|
483 |
"""
|
484 |
Asynchronously load and parse a single JSON file containing video data.
|
485 |
|
|
|
506 |
|
507 |
async with aiofiles.open(my_path, mode="r", encoding="utf-8") as f:
|
508 |
content = await f.read()
|
509 |
+
payload = json.loads(content)
|
510 |
+
for entry in payload:
|
511 |
+
entry.update({"group": str(my_path)})
|
512 |
return payload
|
513 |
|
514 |
|
515 |
+
async def load_json_files(glob_list: List[str]):
|
516 |
"""
|
517 |
Asynchronously load and parse multiple JSON files matching given patterns.
|
518 |
|
|
|
521 |
is designed to handle large datasets efficiently by leveraging async I/O.
|
522 |
|
523 |
Args:
|
524 |
+
glob_list (List[str]): List of glob patterns to match JSON files.
|
525 |
Supports standard glob syntax including recursive
|
526 |
patterns with ** for subdirectory traversal.
|
527 |
|
|
|
539 |
>>> videos = await load_json_files(patterns)
|
540 |
>>> len(videos) # Total videos from all matched files
|
541 |
"""
|
542 |
+
logging.debug("Loading from %d globs:", len(glob_list))
|
543 |
+
|
544 |
files = []
|
545 |
+
for globstring in glob_list:
|
546 |
+
logging.debug("Loading glob: %s", globstring)
|
547 |
+
new_files = glob.glob(globstring, recursive=True)
|
548 |
+
logging.debug("New files: %d", len(new_files))
|
549 |
+
files.extend(new_files)
|
550 |
+
logging.debug("Total files: %d", len(files))
|
551 |
|
552 |
tasks = [load_single_json(f) for f in files]
|
553 |
results = await asyncio.gather(*tasks)
|
554 |
return [item for sublist in results for item in sublist] # flatten
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pstuts_rag/pstuts_rag/graph.py
CHANGED
@@ -7,12 +7,30 @@ from langchain_core.language_models.chat_models import BaseChatModel
|
|
7 |
from langchain_core.messages import AIMessage
|
8 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
9 |
from langchain_core.runnables import Runnable, RunnableLambda
|
10 |
-
from pstuts_rag.prompts import TAVILY_SYSTEM
|
11 |
-
from pstuts_rag.state import PsTutsTeamState
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
|
|
|
|
14 |
import logging
|
15 |
-
from typing import Callable, Dict, Tuple
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
|
18 |
def search_agent(state: PsTutsTeamState, chain: Runnable) -> Dict:
|
@@ -75,7 +93,7 @@ def create_rag_node(rag_chain: Runnable, name: str = "VideoSearch"):
|
|
75 |
name=name,
|
76 |
)
|
77 |
|
78 |
-
return rag_node
|
79 |
|
80 |
|
81 |
def create_agent(
|
@@ -126,7 +144,7 @@ def create_tavily_node(
|
|
126 |
agent_node, agent=adobe_help_agent, name=name
|
127 |
)
|
128 |
|
129 |
-
return adobe_help_node
|
130 |
|
131 |
|
132 |
def create_team_supervisor(llm: BaseChatModel, system_prompt, members):
|
@@ -161,3 +179,108 @@ def create_team_supervisor(llm: BaseChatModel, system_prompt, members):
|
|
161 |
| llm.bind_functions(functions=[function_def], function_call="route")
|
162 |
| JsonOutputFunctionsParser()
|
163 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from langchain_core.messages import AIMessage
|
8 |
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
9 |
from langchain_core.runnables import Runnable, RunnableLambda
|
|
|
|
|
10 |
|
11 |
+
from langgraph.graph import StateGraph
|
12 |
+
|
13 |
+
from pstuts_rag.prompts import SUPERVISOR_SYSTEM, TAVILY_SYSTEM
|
14 |
+
from pstuts_rag.state import PsTutsTeamState
|
15 |
+
from pstuts_rag.datastore import DatastoreManager
|
16 |
+
from pstuts_rag.configuration import Configuration
|
17 |
|
18 |
+
import asyncio
|
19 |
+
import functools
|
20 |
import logging
|
21 |
+
from typing import Callable, Dict, Tuple, Optional, Union
|
22 |
+
|
23 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
24 |
+
|
25 |
+
from app import (
|
26 |
+
ADOBEHELP,
|
27 |
+
VIDEOARCHIVE,
|
28 |
+
ApplicationState,
|
29 |
+
app_state,
|
30 |
+
enter_chain,
|
31 |
+
)
|
32 |
+
|
33 |
+
from pstuts_rag.rag_for_transcripts import retrieve_videos
|
34 |
|
35 |
|
36 |
def search_agent(state: PsTutsTeamState, chain: Runnable) -> Dict:
|
|
|
93 |
name=name,
|
94 |
)
|
95 |
|
96 |
+
return rag_node
|
97 |
|
98 |
|
99 |
def create_agent(
|
|
|
144 |
agent_node, agent=adobe_help_agent, name=name
|
145 |
)
|
146 |
|
147 |
+
return adobe_help_node
|
148 |
|
149 |
|
150 |
def create_team_supervisor(llm: BaseChatModel, system_prompt, members):
|
|
|
179 |
| llm.bind_functions(functions=[function_def], function_call="route")
|
180 |
| JsonOutputFunctionsParser()
|
181 |
)
|
182 |
+
|
183 |
+
|
184 |
+
async def startup(
|
185 |
+
config=Configuration(), on_loading_complete: Optional[Callable] = None
|
186 |
+
):
|
187 |
+
"""
|
188 |
+
Initialize the application with optional loading completion callback.
|
189 |
+
|
190 |
+
Args:
|
191 |
+
config: Configuration object with application settings
|
192 |
+
on_loading_complete: Optional callback (sync or async) to call when
|
193 |
+
datastore loading completes
|
194 |
+
|
195 |
+
Returns:
|
196 |
+
DatastoreManager: The initialized datastore manager
|
197 |
+
"""
|
198 |
+
|
199 |
+
### PROCESS THE CONFIGURATION
|
200 |
+
log_level = getattr(logging, config.eva_log_level, logging.INFO)
|
201 |
+
logging.basicConfig(level=log_level, format="%(levelname)s: %(message)s")
|
202 |
+
|
203 |
+
### CREATE THE DATABASE
|
204 |
+
|
205 |
+
datastore = DatastoreManager(
|
206 |
+
name=config.eva_workflow_name,
|
207 |
+
embeddings=HuggingFaceEmbeddings(model_name=config.embedding_model),
|
208 |
+
)
|
209 |
+
|
210 |
+
### START DATABASE POPULATION
|
211 |
+
|
212 |
+
globs = [str(g) for g in config.transcript_glob.split(":")]
|
213 |
+
|
214 |
+
# Add custom callback if provided, otherwise use default logging
|
215 |
+
if on_loading_complete:
|
216 |
+
datastore.add_completion_callback(on_loading_complete)
|
217 |
+
else:
|
218 |
+
# Default callback for logging
|
219 |
+
def default_logging_callback():
|
220 |
+
logging.info("🎉 Datastore loading completed!")
|
221 |
+
|
222 |
+
datastore.add_completion_callback(default_logging_callback)
|
223 |
+
|
224 |
+
asyncio.create_task(datastore.from_json_globs(globs))
|
225 |
+
|
226 |
+
### CREATE THE RAG CHAIN
|
227 |
+
ai_graph = StateGraph(PsTutsTeamState, config_schema=Configuration)
|
228 |
+
|
229 |
+
return datastore
|
230 |
+
|
231 |
+
|
232 |
+
async def build_the_graph(current_state: ApplicationState):
|
233 |
+
"""
|
234 |
+
Builds the agent graph for routing user queries.
|
235 |
+
|
236 |
+
Creates the necessary nodes (Adobe help, RAG search, supervisor), defines their
|
237 |
+
connections, and compiles the graph into a runnable chain.
|
238 |
+
|
239 |
+
Args:
|
240 |
+
current_state: Current application state with required components
|
241 |
+
"""
|
242 |
+
adobe_help_node, _, _ = create_tavily_node(
|
243 |
+
llm=app_state.llm, name=ADOBEHELP
|
244 |
+
)
|
245 |
+
|
246 |
+
rag_node, _ = create_rag_node(
|
247 |
+
rag_chain=retrieve_videos(),
|
248 |
+
name=VIDEOARCHIVE,
|
249 |
+
)
|
250 |
+
|
251 |
+
supervisor_agent = create_team_supervisor(
|
252 |
+
current_state.llm,
|
253 |
+
SUPERVISOR_SYSTEM,
|
254 |
+
[VIDEOARCHIVE, ADOBEHELP],
|
255 |
+
)
|
256 |
+
|
257 |
+
ai_graph = langgraph.graph.StateGraph(PsTutsTeamState)
|
258 |
+
|
259 |
+
ai_graph.add_node(VIDEOARCHIVE, rag_node)
|
260 |
+
ai_graph.add_node(ADOBEHELP, adobe_help_node)
|
261 |
+
ai_graph.add_node("supervisor", supervisor_agent)
|
262 |
+
|
263 |
+
edges = [
|
264 |
+
[VIDEOARCHIVE, "supervisor"],
|
265 |
+
[ADOBEHELP, "supervisor"],
|
266 |
+
]
|
267 |
+
|
268 |
+
[ai_graph.add_edge(*p) for p in edges]
|
269 |
+
|
270 |
+
ai_graph.add_conditional_edges(
|
271 |
+
"supervisor",
|
272 |
+
lambda x: x["next"],
|
273 |
+
{
|
274 |
+
VIDEOARCHIVE: VIDEOARCHIVE,
|
275 |
+
ADOBEHELP: ADOBEHELP,
|
276 |
+
"FINISH": langgraph.graph.END,
|
277 |
+
},
|
278 |
+
)
|
279 |
+
|
280 |
+
ai_graph.set_entry_point("supervisor")
|
281 |
+
|
282 |
+
return enter_chain | ai_graph.compile(), ai_graph
|
283 |
+
|
284 |
+
|
285 |
+
# Note: Cannot run build_the_graph() here as it requires current_state parameter
|
286 |
+
# graph, _ = asyncio.run(build_the_graph())
|
pstuts_rag/pstuts_rag/rag.py
CHANGED
@@ -32,6 +32,7 @@ from .prompts import RAG_PROMPT_TEMPLATES
|
|
32 |
|
33 |
from .datastore import DatastoreManager
|
34 |
|
|
|
35 |
|
36 |
class RAGChainFactory:
|
37 |
"""Factory class for creating RAG (Retrieval Augmented Generation) chains.
|
@@ -164,10 +165,11 @@ class RAGChainFactory:
|
|
164 |
|
165 |
Returns:
|
166 |
Runnable: The complete RAG chain
|
|
|
167 |
"""
|
168 |
self.answer_chain = self.prompt_template | llm
|
169 |
self.rag_chain = (
|
170 |
-
|
171 |
| self.prepare_query
|
172 |
| {"input": RunnablePassthrough(), "answer": self.answer_chain}
|
173 |
| self.pack_references
|
@@ -226,7 +228,7 @@ class RAGChainInstance:
|
|
226 |
self.llm = llm
|
227 |
self.embeddings = embeddings
|
228 |
|
229 |
-
async def build_chain(
|
230 |
self, json_payload: List[Dict[str, Any]]
|
231 |
) -> Runnable:
|
232 |
"""
|
@@ -259,3 +261,5 @@ class RAGChainInstance:
|
|
259 |
)
|
260 |
self.rag_chain = self.rag_factory.get_rag_chain(self.llm)
|
261 |
return self.rag_chain
|
|
|
|
|
|
32 |
|
33 |
from .datastore import DatastoreManager
|
34 |
|
35 |
+
from pstuts_rag.configuration import Configuration
|
36 |
|
37 |
class RAGChainFactory:
|
38 |
"""Factory class for creating RAG (Retrieval Augmented Generation) chains.
|
|
|
165 |
|
166 |
Returns:
|
167 |
Runnable: The complete RAG chain
|
168 |
+
|
169 |
"""
|
170 |
self.answer_chain = self.prompt_template | llm
|
171 |
self.rag_chain = (
|
172 |
+
itemgetter("question")
|
173 |
| self.prepare_query
|
174 |
| {"input": RunnablePassthrough(), "answer": self.answer_chain}
|
175 |
| self.pack_references
|
|
|
228 |
self.llm = llm
|
229 |
self.embeddings = embeddings
|
230 |
|
231 |
+
async def build_chain()
|
232 |
self, json_payload: List[Dict[str, Any]]
|
233 |
) -> Runnable:
|
234 |
"""
|
|
|
261 |
)
|
262 |
self.rag_chain = self.rag_factory.get_rag_chain(self.llm)
|
263 |
return self.rag_chain
|
264 |
+
|
265 |
+
|
pstuts_rag/pstuts_rag/rag_for_transcripts.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import asyncio
|
3 |
+
from operator import itemgetter
|
4 |
+
from typing import Any, Dict, Union, Optional, Callable
|
5 |
+
import logging
|
6 |
+
|
7 |
+
from langchain.prompts import ChatPromptTemplate
|
8 |
+
from langchain_core.messages import AIMessage
|
9 |
+
from langchain_core.runnables import (
|
10 |
+
Runnable,
|
11 |
+
RunnableParallel,
|
12 |
+
RunnablePassthrough,
|
13 |
+
RunnableConfig,
|
14 |
+
)
|
15 |
+
from langchain_openai import ChatOpenAI
|
16 |
+
from langchain_huggingface import ChatHuggingFace
|
17 |
+
from langchain_ollama import ChatOllama
|
18 |
+
|
19 |
+
from .datastore import DatastoreManager
|
20 |
+
from .prompts import RAG_PROMPT_TEMPLATES
|
21 |
+
|
22 |
+
from pstuts_rag.configuration import Configuration, ModelAPI
|
23 |
+
|
24 |
+
|
25 |
+
def pack_references(msg_dict: Dict[str, Any]) -> AIMessage:
|
26 |
+
"""Pack reference information into the AI message.
|
27 |
+
|
28 |
+
Takes the generated answer and input context, formats references,
|
29 |
+
and appends them to the message content.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
msg_dict: Dictionary containing the answer and input
|
33 |
+
|
34 |
+
Returns:
|
35 |
+
AIMessage: Message with references appended
|
36 |
+
"""
|
37 |
+
answer: AIMessage = msg_dict["answer"]
|
38 |
+
input = msg_dict["input"]
|
39 |
+
|
40 |
+
reference_dicts = [
|
41 |
+
{k: doc.metadata[k] for k in ("title", "source", "start", "stop")}
|
42 |
+
for doc in input["context"]
|
43 |
+
]
|
44 |
+
references = str(json.dumps(reference_dicts, indent=2))
|
45 |
+
|
46 |
+
text_w_references = answer.content
|
47 |
+
if "I don't know" not in answer.content:
|
48 |
+
text_w_references = "\n".join(
|
49 |
+
[str(text_w_references), "**REFERENCES**", references]
|
50 |
+
)
|
51 |
+
|
52 |
+
output: AIMessage = answer.model_copy(
|
53 |
+
update={
|
54 |
+
"content": text_w_references,
|
55 |
+
"additional_kwargs": {
|
56 |
+
**answer.additional_kwargs,
|
57 |
+
"context": input["context"],
|
58 |
+
"question": input["question"],
|
59 |
+
},
|
60 |
+
}
|
61 |
+
)
|
62 |
+
|
63 |
+
return output
|
64 |
+
|
65 |
+
|
66 |
+
def retrieve_videos(
|
67 |
+
datastore: DatastoreManager,
|
68 |
+
config: Union[RunnableConfig, Configuration] = Configuration(),
|
69 |
+
) -> Runnable:
|
70 |
+
|
71 |
+
configurable = (
|
72 |
+
config
|
73 |
+
if isinstance(config, Configuration)
|
74 |
+
else Configuration.from_runnable_config(config)
|
75 |
+
)
|
76 |
+
|
77 |
+
cls = {
|
78 |
+
ModelAPI.HUGGINGFACE: ChatHuggingFace,
|
79 |
+
ModelAPI.OPENAI: ChatOpenAI,
|
80 |
+
ModelAPI.OLLAMA: ChatOllama,
|
81 |
+
}.get(configurable.llm_api, ChatOpenAI)
|
82 |
+
|
83 |
+
llm = cls(model=configurable.llm_tool_model)
|
84 |
+
|
85 |
+
answer_chain = (
|
86 |
+
ChatPromptTemplate.from_messages(list(RAG_PROMPT_TEMPLATES.items()))
|
87 |
+
| llm
|
88 |
+
)
|
89 |
+
|
90 |
+
rag_chain = (
|
91 |
+
itemgetter("question")
|
92 |
+
| RunnableParallel(
|
93 |
+
context=datastore.get_retriever(
|
94 |
+
n_context_docs=configurable.n_context_docs
|
95 |
+
),
|
96 |
+
question=RunnablePassthrough(),
|
97 |
+
)
|
98 |
+
| {
|
99 |
+
"input": RunnablePassthrough(),
|
100 |
+
"answer": answer_chain,
|
101 |
+
}
|
102 |
+
| pack_references
|
103 |
+
)
|
104 |
+
|
105 |
+
return rag_chain
|
106 |
+
|
107 |
+
|
108 |
+
def startup(
|
109 |
+
config=Configuration(),
|
110 |
+
callback_on_loading_complete: Optional[Callable] = None,
|
111 |
+
):
|
112 |
+
"""
|
113 |
+
Initialize the application with optional loading completion callback.
|
114 |
+
|
115 |
+
Args:
|
116 |
+
config: Configuration object with application settings
|
117 |
+
on_loading_complete: Optional callback (sync or async) to call when
|
118 |
+
datastore loading completes
|
119 |
+
|
120 |
+
Returns:
|
121 |
+
DatastoreManager: The initialized datastore manager
|
122 |
+
"""
|
123 |
+
|
124 |
+
### PROCESS THE CONFIGURATION
|
125 |
+
log_level = getattr(logging, config.eva_log_level, logging.INFO)
|
126 |
+
logging.basicConfig(level=log_level, format="%(levelname)s: %(message)s")
|
127 |
+
|
128 |
+
### CREATE THE DATABASE
|
129 |
+
|
130 |
+
datastore = DatastoreManager()
|
131 |
+
if callback_on_loading_complete:
|
132 |
+
datastore.add_completion_callback(callback_on_loading_complete)
|
133 |
+
|
134 |
+
### START DATABASE POPULATION
|
135 |
+
|
136 |
+
globs = [str(g) for g in config.transcript_glob.split(":")]
|
137 |
+
|
138 |
+
# # Add custom callback if provided, otherwise use default logging
|
139 |
+
# if on_loading_complete:
|
140 |
+
# datastore.add_completion_callback(on_loading_complete)
|
141 |
+
# else:
|
142 |
+
# # Default callback for logging
|
143 |
+
# def default_logging_callback():
|
144 |
+
# logging.info("🎉 Datastore loading completed!")
|
145 |
+
|
146 |
+
# datastore.add_completion_callback(default_logging_callback)
|
147 |
+
|
148 |
+
asyncio.create_task(datastore.from_json_globs(globs))
|
149 |
+
|
150 |
+
return datastore
|
pstuts_rag/pstuts_rag/utils.py
CHANGED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Dict, Type
|
2 |
+
|
3 |
+
from langchain_openai import ChatOpenAI
|
4 |
+
from langchain_openai.embeddings import OpenAIEmbeddings
|
5 |
+
|
6 |
+
from langchain_huggingface import ChatHuggingFace
|
7 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
8 |
+
|
9 |
+
from langchain_ollama import ChatOllama
|
10 |
+
from langchain_ollama.embeddings import OllamaEmbeddings
|
11 |
+
|
12 |
+
from pstuts_rag.configuration import ModelAPI
|
13 |
+
|
14 |
+
# Chat model selector dictionary
|
15 |
+
"""
|
16 |
+
ChatAPISelector: Dictionary mapping ModelAPI enum values to their corresponding chat model classes.
|
17 |
+
|
18 |
+
This selector enables dynamic instantiation of chat models based on the configured
|
19 |
+
API provider. Supports OpenAI, HuggingFace, and Ollama chat model implementations.
|
20 |
+
|
21 |
+
Type:
|
22 |
+
Dict[ModelAPI, Type[ChatHuggingFace | ChatOpenAI | ChatOllama]]
|
23 |
+
|
24 |
+
Keys:
|
25 |
+
ModelAPI.HUGGINGFACE: Maps to ChatHuggingFace class
|
26 |
+
ModelAPI.OPENAI: Maps to ChatOpenAI class
|
27 |
+
ModelAPI.OLLAMA: Maps to ChatOllama class
|
28 |
+
|
29 |
+
Example:
|
30 |
+
>>> from pstuts_rag.configuration import ModelAPI
|
31 |
+
>>> from pstuts_rag.utils import ChatAPISelector
|
32 |
+
>>>
|
33 |
+
>>> # Get the appropriate chat model class
|
34 |
+
>>> api_provider = ModelAPI.OPENAI
|
35 |
+
>>> chat_class = ChatAPISelector[api_provider]
|
36 |
+
>>>
|
37 |
+
>>> # Instantiate the chat model
|
38 |
+
>>> chat_model = chat_class(model="gpt-3.5-turbo", temperature=0.7)
|
39 |
+
>>>
|
40 |
+
>>> # Alternative usage with configuration
|
41 |
+
>>> config = Configuration(llm_api=ModelAPI.OLLAMA)
|
42 |
+
>>> chat_class = ChatAPISelector[config.llm_api]
|
43 |
+
>>> chat_model = chat_class(model="llama2:7b")
|
44 |
+
"""
|
45 |
+
ChatAPISelector: Dict[
|
46 |
+
ModelAPI, Type[ChatHuggingFace | ChatOpenAI | ChatOllama]
|
47 |
+
] = {
|
48 |
+
ModelAPI.HUGGINGFACE: ChatHuggingFace,
|
49 |
+
ModelAPI.OPENAI: ChatOpenAI,
|
50 |
+
ModelAPI.OLLAMA: ChatOllama,
|
51 |
+
}
|
52 |
+
|
53 |
+
# Embeddings model selector dictionary
|
54 |
+
"""
|
55 |
+
EmbeddingsAPISelector: Dictionary mapping ModelAPI enum values to their corresponding embedding model classes.
|
56 |
+
|
57 |
+
This selector enables dynamic instantiation of embedding models based on the configured
|
58 |
+
API provider. Supports OpenAI, HuggingFace, and Ollama embedding implementations.
|
59 |
+
|
60 |
+
Type:
|
61 |
+
Dict[ModelAPI, Type[HuggingFaceEmbeddings | OpenAIEmbeddings | OllamaEmbeddings]]
|
62 |
+
|
63 |
+
Keys:
|
64 |
+
ModelAPI.HUGGINGFACE: Maps to HuggingFaceEmbeddings class
|
65 |
+
ModelAPI.OPENAI: Maps to OpenAIEmbeddings class
|
66 |
+
ModelAPI.OLLAMA: Maps to OllamaEmbeddings class
|
67 |
+
|
68 |
+
Example:
|
69 |
+
>>> from pstuts_rag.configuration import ModelAPI
|
70 |
+
>>> from pstuts_rag.utils import EmbeddingsAPISelector
|
71 |
+
>>>
|
72 |
+
>>> # Get the appropriate embeddings model class
|
73 |
+
>>> api_provider = ModelAPI.HUGGINGFACE
|
74 |
+
>>> embeddings_class = EmbeddingsAPISelector[api_provider]
|
75 |
+
>>>
|
76 |
+
>>> # Instantiate the embeddings model
|
77 |
+
>>> embeddings = embeddings_class(
|
78 |
+
... model_name="sentence-transformers/all-MiniLM-L6-v2"
|
79 |
+
... )
|
80 |
+
>>>
|
81 |
+
>>> # Alternative usage with configuration
|
82 |
+
>>> config = Configuration(embedding_api=ModelAPI.OPENAI)
|
83 |
+
>>> embeddings_class = EmbeddingsAPISelector[config.embedding_api]
|
84 |
+
>>> embeddings = embeddings_class(model="text-embedding-3-small")
|
85 |
+
"""
|
86 |
+
EmbeddingsAPISelector: Dict[
|
87 |
+
ModelAPI, Type[HuggingFaceEmbeddings | OpenAIEmbeddings | OllamaEmbeddings]
|
88 |
+
] = {
|
89 |
+
ModelAPI.HUGGINGFACE: HuggingFaceEmbeddings,
|
90 |
+
ModelAPI.OPENAI: OpenAIEmbeddings,
|
91 |
+
ModelAPI.OLLAMA: OllamaEmbeddings,
|
92 |
+
}
|
pyproject.toml
CHANGED
@@ -45,6 +45,9 @@ dependencies = [
|
|
45 |
"google>=3.0.0",
|
46 |
"numpy==2.2.2",
|
47 |
"tavily-python>=0.7.2",
|
|
|
|
|
|
|
48 |
]
|
49 |
authors = [{ name = "Marko Budisic", email = "[email protected]" }]
|
50 |
license = "MIT"
|
|
|
45 |
"google>=3.0.0",
|
46 |
"numpy==2.2.2",
|
47 |
"tavily-python>=0.7.2",
|
48 |
+
"logging>=0.4.9.6",
|
49 |
+
"langchain-ollama>=0.3.2",
|
50 |
+
"simsimd>=6.2.1",
|
51 |
]
|
52 |
authors = [{ name = "Marko Budisic", email = "[email protected]" }]
|
53 |
license = "MIT"
|
uv.lock
CHANGED
@@ -1704,6 +1704,19 @@ wheels = [
|
|
1704 |
{ url = "https://files.pythonhosted.org/packages/0b/76/eb08f7b87f3377ced3800b2896841ccdcde3e246f46523946ecf092447e6/langchain_huggingface-0.2.0-py3-none-any.whl", hash = "sha256:eed1fdfe51d16d761499fa754491a1a4dcb61798c1e5516335071d1dad852a41", size = 27329 },
|
1705 |
]
|
1706 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1707 |
[[package]]
|
1708 |
name = "langchain-openai"
|
1709 |
version = "0.3.8"
|
@@ -1839,6 +1852,12 @@ dependencies = [
|
|
1839 |
]
|
1840 |
sdist = { url = "https://files.pythonhosted.org/packages/fc/fc/628b39e31b368aacbca51721ba7a66a4d140e9be916a0c7396664fdaed7a/literalai-0.1.103.tar.gz", hash = "sha256:060e86e63c0f53041a737b2183354ac092ee8cd9faec817dc95df639bb263a7d", size = 62540 }
|
1841 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1842 |
[[package]]
|
1843 |
name = "lxml"
|
1844 |
version = "5.4.0"
|
@@ -2412,6 +2431,19 @@ wheels = [
|
|
2412 |
{ url = "https://files.pythonhosted.org/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1", size = 89265 },
|
2413 |
]
|
2414 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2415 |
[[package]]
|
2416 |
name = "openai"
|
2417 |
version = "1.59.9"
|
@@ -2958,10 +2990,12 @@ dependencies = [
|
|
2958 |
{ name = "langchain-core" },
|
2959 |
{ name = "langchain-experimental" },
|
2960 |
{ name = "langchain-huggingface" },
|
|
|
2961 |
{ name = "langchain-openai" },
|
2962 |
{ name = "langchain-qdrant" },
|
2963 |
{ name = "langgraph" },
|
2964 |
{ name = "langsmith" },
|
|
|
2965 |
{ name = "nest-asyncio" },
|
2966 |
{ name = "numpy" },
|
2967 |
{ name = "pandas" },
|
@@ -2973,6 +3007,7 @@ dependencies = [
|
|
2973 |
{ name = "scikit-learn" },
|
2974 |
{ name = "scipy" },
|
2975 |
{ name = "sentence-transformers" },
|
|
|
2976 |
{ name = "tavily-python" },
|
2977 |
{ name = "torch" },
|
2978 |
{ name = "tqdm" },
|
@@ -3023,10 +3058,12 @@ requires-dist = [
|
|
3023 |
{ name = "langchain-core", specifier = ">=0.3.59" },
|
3024 |
{ name = "langchain-experimental", specifier = ">=0.3.4" },
|
3025 |
{ name = "langchain-huggingface", specifier = ">=0.2.0" },
|
|
|
3026 |
{ name = "langchain-openai" },
|
3027 |
{ name = "langchain-qdrant", specifier = ">=0.2.0" },
|
3028 |
{ name = "langgraph", specifier = ">=0.4.3" },
|
3029 |
{ name = "langsmith", specifier = ">=0.0.50" },
|
|
|
3030 |
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=0.900" },
|
3031 |
{ name = "nest-asyncio", specifier = ">=1.5.6" },
|
3032 |
{ name = "numpy", specifier = "==2.2.2" },
|
@@ -3042,6 +3079,7 @@ requires-dist = [
|
|
3042 |
{ name = "scikit-learn", specifier = ">=1.0.0" },
|
3043 |
{ name = "scipy", specifier = ">=1.10.0" },
|
3044 |
{ name = "sentence-transformers", specifier = ">=3.4.1" },
|
|
|
3045 |
{ name = "tavily-python", specifier = ">=0.7.2" },
|
3046 |
{ name = "torch", specifier = ">=2.0.0" },
|
3047 |
{ name = "tqdm", specifier = ">=4.65.0" },
|
@@ -3933,6 +3971,65 @@ wheels = [
|
|
3933 |
{ url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
|
3934 |
]
|
3935 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3936 |
[[package]]
|
3937 |
name = "six"
|
3938 |
version = "1.17.0"
|
|
|
1704 |
{ url = "https://files.pythonhosted.org/packages/0b/76/eb08f7b87f3377ced3800b2896841ccdcde3e246f46523946ecf092447e6/langchain_huggingface-0.2.0-py3-none-any.whl", hash = "sha256:eed1fdfe51d16d761499fa754491a1a4dcb61798c1e5516335071d1dad852a41", size = 27329 },
|
1705 |
]
|
1706 |
|
1707 |
+
[[package]]
|
1708 |
+
name = "langchain-ollama"
|
1709 |
+
version = "0.3.2"
|
1710 |
+
source = { registry = "https://pypi.org/simple" }
|
1711 |
+
dependencies = [
|
1712 |
+
{ name = "langchain-core" },
|
1713 |
+
{ name = "ollama" },
|
1714 |
+
]
|
1715 |
+
sdist = { url = "https://files.pythonhosted.org/packages/21/99/f548adc83e9f4d06805cc2fc530a94fca321318919c27ec8d5ad9fab51ab/langchain_ollama-0.3.2.tar.gz", hash = "sha256:9e747e7bda1b55cdfa344433814b035be3f06d1bae32b1ffb696ccfc12cfa8ae", size = 21147 }
|
1716 |
+
wheels = [
|
1717 |
+
{ url = "https://files.pythonhosted.org/packages/3a/a7/d0fff871a353e6f602fe94912576219b47639db5a08ad454ea914cc11c8d/langchain_ollama-0.3.2-py3-none-any.whl", hash = "sha256:f7516f2f22d912dba1286d644bff00c287799e758d7e465a1439244a1bb02820", size = 20702 },
|
1718 |
+
]
|
1719 |
+
|
1720 |
[[package]]
|
1721 |
name = "langchain-openai"
|
1722 |
version = "0.3.8"
|
|
|
1852 |
]
|
1853 |
sdist = { url = "https://files.pythonhosted.org/packages/fc/fc/628b39e31b368aacbca51721ba7a66a4d140e9be916a0c7396664fdaed7a/literalai-0.1.103.tar.gz", hash = "sha256:060e86e63c0f53041a737b2183354ac092ee8cd9faec817dc95df639bb263a7d", size = 62540 }
|
1854 |
|
1855 |
+
[[package]]
|
1856 |
+
name = "logging"
|
1857 |
+
version = "0.4.9.6"
|
1858 |
+
source = { registry = "https://pypi.org/simple" }
|
1859 |
+
sdist = { url = "https://files.pythonhosted.org/packages/93/4b/979db9e44be09f71e85c9c8cfc42f258adfb7d93ce01deed2788b2948919/logging-0.4.9.6.tar.gz", hash = "sha256:26f6b50773f085042d301085bd1bf5d9f3735704db9f37c1ce6d8b85c38f2417", size = 96029 }
|
1860 |
+
|
1861 |
[[package]]
|
1862 |
name = "lxml"
|
1863 |
version = "5.4.0"
|
|
|
2431 |
{ url = "https://files.pythonhosted.org/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1", size = 89265 },
|
2432 |
]
|
2433 |
|
2434 |
+
[[package]]
|
2435 |
+
name = "ollama"
|
2436 |
+
version = "0.5.1"
|
2437 |
+
source = { registry = "https://pypi.org/simple" }
|
2438 |
+
dependencies = [
|
2439 |
+
{ name = "httpx" },
|
2440 |
+
{ name = "pydantic" },
|
2441 |
+
]
|
2442 |
+
sdist = { url = "https://files.pythonhosted.org/packages/8d/96/c7fe0d2d1b3053be614822a7b722c7465161b3672ce90df71515137580a0/ollama-0.5.1.tar.gz", hash = "sha256:5a799e4dc4e7af638b11e3ae588ab17623ee019e496caaf4323efbaa8feeff93", size = 41112 }
|
2443 |
+
wheels = [
|
2444 |
+
{ url = "https://files.pythonhosted.org/packages/d6/76/3f96c8cdbf3955d7a73ee94ce3e0db0755d6de1e0098a70275940d1aff2f/ollama-0.5.1-py3-none-any.whl", hash = "sha256:4c8839f35bc173c7057b1eb2cbe7f498c1a7e134eafc9192824c8aecb3617506", size = 13369 },
|
2445 |
+
]
|
2446 |
+
|
2447 |
[[package]]
|
2448 |
name = "openai"
|
2449 |
version = "1.59.9"
|
|
|
2990 |
{ name = "langchain-core" },
|
2991 |
{ name = "langchain-experimental" },
|
2992 |
{ name = "langchain-huggingface" },
|
2993 |
+
{ name = "langchain-ollama" },
|
2994 |
{ name = "langchain-openai" },
|
2995 |
{ name = "langchain-qdrant" },
|
2996 |
{ name = "langgraph" },
|
2997 |
{ name = "langsmith" },
|
2998 |
+
{ name = "logging" },
|
2999 |
{ name = "nest-asyncio" },
|
3000 |
{ name = "numpy" },
|
3001 |
{ name = "pandas" },
|
|
|
3007 |
{ name = "scikit-learn" },
|
3008 |
{ name = "scipy" },
|
3009 |
{ name = "sentence-transformers" },
|
3010 |
+
{ name = "simsimd" },
|
3011 |
{ name = "tavily-python" },
|
3012 |
{ name = "torch" },
|
3013 |
{ name = "tqdm" },
|
|
|
3058 |
{ name = "langchain-core", specifier = ">=0.3.59" },
|
3059 |
{ name = "langchain-experimental", specifier = ">=0.3.4" },
|
3060 |
{ name = "langchain-huggingface", specifier = ">=0.2.0" },
|
3061 |
+
{ name = "langchain-ollama", specifier = ">=0.3.2" },
|
3062 |
{ name = "langchain-openai" },
|
3063 |
{ name = "langchain-qdrant", specifier = ">=0.2.0" },
|
3064 |
{ name = "langgraph", specifier = ">=0.4.3" },
|
3065 |
{ name = "langsmith", specifier = ">=0.0.50" },
|
3066 |
+
{ name = "logging", specifier = ">=0.4.9.6" },
|
3067 |
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=0.900" },
|
3068 |
{ name = "nest-asyncio", specifier = ">=1.5.6" },
|
3069 |
{ name = "numpy", specifier = "==2.2.2" },
|
|
|
3079 |
{ name = "scikit-learn", specifier = ">=1.0.0" },
|
3080 |
{ name = "scipy", specifier = ">=1.10.0" },
|
3081 |
{ name = "sentence-transformers", specifier = ">=3.4.1" },
|
3082 |
+
{ name = "simsimd", specifier = ">=6.2.1" },
|
3083 |
{ name = "tavily-python", specifier = ">=0.7.2" },
|
3084 |
{ name = "torch", specifier = ">=2.0.0" },
|
3085 |
{ name = "tqdm", specifier = ">=4.65.0" },
|
|
|
3971 |
{ url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
|
3972 |
]
|
3973 |
|
3974 |
+
[[package]]
|
3975 |
+
name = "simsimd"
|
3976 |
+
version = "6.2.1"
|
3977 |
+
source = { registry = "https://pypi.org/simple" }
|
3978 |
+
sdist = { url = "https://files.pythonhosted.org/packages/da/1c/90e6ec0f0de20108fdd7d5665ac2916b1e8c893ce2f8d7481fd37eabbb97/simsimd-6.2.1.tar.gz", hash = "sha256:5e202c5386a4141946b7aee05faac8ebc2e36bca0a360b24080e57b59bc4ef6a", size = 165828 }
|
3979 |
+
wheels = [
|
3980 |
+
{ url = "https://files.pythonhosted.org/packages/a7/5f/361cee272fd6c88f33e14e233792f59dd58836ea8c776344f7445a829ca2/simsimd-6.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:e9614309af75be4d08a051dc61ed5cf41b5239b8303b37dc2f9c8a7223534392", size = 170254 },
|
3981 |
+
{ url = "https://files.pythonhosted.org/packages/b8/88/edf4442ec655765d570bfb6cef81dfb12c8829c28e580459bac8a4847fb5/simsimd-6.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ea4f0f68be5f85bbcf4322bfdd1b449176cf5fdd99960c546514457635632443", size = 102331 },
|
3982 |
+
{ url = "https://files.pythonhosted.org/packages/5d/2b/9e7d42ac54bdb32d76953db3bc83eec29bd5d5c9a4069d380b18e200d6bd/simsimd-6.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:12a8d60ccc8991dfbbf056c221ce4f02135f5892492894972f421a6f155015d9", size = 93455 },
|
3983 |
+
{ url = "https://files.pythonhosted.org/packages/13/9c/fac1167e80328d1e332f515c9cd62da4a0e12b9aa8ee90d448eb4ad5a47f/simsimd-6.2.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a74142ea21a6fd3ec5c64e4d4acf1ec6f4d80c0bb1a5989d68af6e84f7ac612e", size = 251040 },
|
3984 |
+
{ url = "https://files.pythonhosted.org/packages/31/93/b374e5538fc65cf381920bdba7603769b1b71e42afe2bb4939e9c338c423/simsimd-6.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:298f7c793fc2a1eeedcefa1278eb2ef6f52ce0b36aaa8780885f96a39ce1a4e8", size = 302428 },
|
3985 |
+
{ url = "https://files.pythonhosted.org/packages/e6/42/2733a0e11b660c6b10f3ec90d7fac6f96267368b961b1a43dda0456fa9f2/simsimd-6.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4025ebad36fb3fa5cffcd48d33375d5e5decc59c1129a259b74fed097eab1ab5", size = 227200 },
|
3986 |
+
{ url = "https://files.pythonhosted.org/packages/eb/ae/40e0804d06a351efe27bb6f8e4d332daeb1681d3f398ca10d8a2b087ab78/simsimd-6.2.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f486682aa7a8918d86df411d3c11c635db4b67d514cb6bb499c0edab7fb8ec58", size = 432333 },
|
3987 |
+
{ url = "https://files.pythonhosted.org/packages/a7/eb/a823b0227b5dc43de8125f502237dd8e844b1e803a74e46aa7c3d0f24f83/simsimd-6.2.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:173e66699597a4fcf6fa50b52cced40216fdcfba15f60b761a2bd9cb1d98a444", size = 632659 },
|
3988 |
+
{ url = "https://files.pythonhosted.org/packages/0a/aa/aee48063c4a98aaea062316dedf598d0d9e09fa9edc28baab6886ae0afa8/simsimd-6.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5b5c6f79f797cc020a2ff64950162dfb6d130c51a07cdac5ad97ec836e85ce50", size = 468407 },
|
3989 |
+
{ url = "https://files.pythonhosted.org/packages/d4/84/e89bc71456aa2d48e5acf3795b2384f597de643f17d00d752aa8217af233/simsimd-6.2.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:25812637f43feaef1a33ae00b81a4d2b0116aadae3a08267486c1e57236fc368", size = 268908 },
|
3990 |
+
{ url = "https://files.pythonhosted.org/packages/94/eb/774debec7ee727f436f15e5b5416b781c78564fff97c81a5fb3b636b4298/simsimd-6.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:592a578c788a9cb7877eff41487cc7f50474e00f774de74bea8590fa95c804ae", size = 344256 },
|
3991 |
+
{ url = "https://files.pythonhosted.org/packages/62/03/fec040e7fbb66fa4766ca959cfd766a22d7a00a4e9371f046d8fcc62d846/simsimd-6.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:191c020f312350ac06eee829376b11d8c1282da8fefb4381fe0625edfb678d8d", size = 389403 },
|
3992 |
+
{ url = "https://files.pythonhosted.org/packages/55/f0/ad441d90a4dde6e100155931fa4468e33cc23276c3caef6330d2a34b866c/simsimd-6.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e9ad2c247ed58ba9bb170a01295cb315a45c817775cc7e51ad342f70978a1057", size = 316665 },
|
3993 |
+
{ url = "https://files.pythonhosted.org/packages/05/27/843adbc6a468a58178dcb7907e72c670c8a7c36a06d8a4c5eac9573f5d2d/simsimd-6.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0ff603134600da12175e66b842b7a7331c827fa070d1d8b63386a40bc8d09fcd", size = 669697 },
|
3994 |
+
{ url = "https://files.pythonhosted.org/packages/6d/db/d2369e0d3b9ca469b923bc81d57dcfed922193e4e4d7cf5f7637df14dd51/simsimd-6.2.1-cp311-cp311-win32.whl", hash = "sha256:99dff4e04663c82284152ecc2e8bf76b2825f3f17e179abf7892e06196061056", size = 55007 },
|
3995 |
+
{ url = "https://files.pythonhosted.org/packages/73/9f/13d6fca5a32a062e84db0a68433ae416073986c8e1d20b5b936cad18bece/simsimd-6.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:0efc6343c440a26cf16463c4c667655af9597bcbd55ad66f33a80b2b84de7412", size = 86855 },
|
3996 |
+
{ url = "https://files.pythonhosted.org/packages/64/e9/7e0514f32c9a0e42261f598775b34a858477e0fcffccf32cc11f94e78ee2/simsimd-6.2.1-cp311-cp311-win_arm64.whl", hash = "sha256:2d364f2c24dd38578bf0eec436c4b901c900ae1893680f46eb5632e01330d814", size = 60195 },
|
3997 |
+
{ url = "https://files.pythonhosted.org/packages/81/87/1f521d471d9079d89dd6860b9dd5d0f39c1633675a30b71acd0bd37cbba5/simsimd-6.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9b3315e41bb759dc038ecd6f4fa7bcf278bf72ee7d982f752482cdc732aea271", size = 169397 },
|
3998 |
+
{ url = "https://files.pythonhosted.org/packages/4b/1a/b0627589737dc75ccd2ed58893e9e7f8b8e082531bd34d319481d88018d5/simsimd-6.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d476c874bafa0d12d4c8c5c47faf17407f3c96140616384421c2aa980342b6f", size = 101478 },
|
3999 |
+
{ url = "https://files.pythonhosted.org/packages/e0/b7/e766f0ce9b595927ae1c534f1409b768187e8af567f4412ca220b67c1155/simsimd-6.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9d4f15c06cc221d29e181197c7bbf92c5e829220cbeb3cd1cf080de78b04f2a", size = 93439 },
|
4000 |
+
{ url = "https://files.pythonhosted.org/packages/ae/48/3b5ec9b3a6063bae2f280f5168aca7099a44fa7ec8b42875b98c79c1d49b/simsimd-6.2.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d286fd4538cb1a1c70e69da00a3acee301519d578931b41161f4f1379d1195c6", size = 251469 },
|
4001 |
+
{ url = "https://files.pythonhosted.org/packages/70/86/16e8d5b9bdd34f75c7515adfad249f394653131bd1a1366076cf6113e84b/simsimd-6.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:050f68cfa85f1fb2cfa156280928e42926e3977034b755023ce1315bf59e87ff", size = 302974 },
|
4002 |
+
{ url = "https://files.pythonhosted.org/packages/02/09/3f4240f2b43957aa0d72a2203b2549c0326c7baf97b7f78c72d48d4cd3d2/simsimd-6.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:67bb4b17e04919545f29c7b708faaccbe027f164f8b5c9f4328604fa8f5560ea", size = 227864 },
|
4003 |
+
{ url = "https://files.pythonhosted.org/packages/07/4a/8c46806493c3a98025f01d81d9f55e0e574f11279c2ad77be919262ea9eb/simsimd-6.2.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3d6bffd999dbb36e606b065e0180365efac2606049c4f7818e4cba2d34c3678f", size = 432491 },
|
4004 |
+
{ url = "https://files.pythonhosted.org/packages/13/44/b56f207031405af52c6158c40e9f1121fe3a716d98946d9fa5919cf00266/simsimd-6.2.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:25adb244fb75dbf49af0d1bcac4ed4a3fef8e847d78449faa5595af0a3e20d61", size = 633061 },
|
4005 |
+
{ url = "https://files.pythonhosted.org/packages/4c/ad/241f87641af09a1789af8df559aa86b45218d087e09c37c2dd8c013819d6/simsimd-6.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b4542cee77e801a9c27370fc36ae271514fc0fb2ce14a35f8b25f47989e3d267", size = 468544 },
|
4006 |
+
{ url = "https://files.pythonhosted.org/packages/e2/3e/357aca7df85ed1092dfa50b91cf1b7c0df6f70b384a0e3798132dd824b5c/simsimd-6.2.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:4f665228f8ff4911790b485e74b00fa9586a141dde6011970be71bb303b5a22f", size = 269133 },
|
4007 |
+
{ url = "https://files.pythonhosted.org/packages/f0/67/079ca2c58bbc5812802c6ac1b332a6ef889d73cf1188726f36edc27898f6/simsimd-6.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:783b4308f80ae00763b0eaa0dac26196958f9c2df60d35a0347ebd2f82ece46d", size = 344412 },
|
4008 |
+
{ url = "https://files.pythonhosted.org/packages/3c/f0/500c9002276259c17e3a6a13a7c7f84e5119602decadbf40429c978655b0/simsimd-6.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:95055e72cfe313c1c8694783bf8a631cc15673b3b775abef367e396d931db0b8", size = 389546 },
|
4009 |
+
{ url = "https://files.pythonhosted.org/packages/55/a2/d3f4c6aabba0430758367b3de5bbab59b979bf3525c039b882001f1d2ade/simsimd-6.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a98f2b383f51b4f4ee568a637fc7958a347fdae0bd184cff8faa8030b6454a39", size = 316912 },
|
4010 |
+
{ url = "https://files.pythonhosted.org/packages/f8/a3/2514189c3aaa1beb1714b36be86e2d3af7067c3c95152d78cc4cffff6d87/simsimd-6.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2e474fd10ceb38e2c9f826108a7762f8ff7912974846d86f08c4e7b19cd35ed4", size = 670006 },
|
4011 |
+
{ url = "https://files.pythonhosted.org/packages/ef/23/dbf7c4aed7542260784dc7bc2056a4e5b6d716a14a9b40989d5c3096990a/simsimd-6.2.1-cp312-cp312-win32.whl", hash = "sha256:b2530ea44fffeab25e5752bec6a5991f30fbc430b04647980db5b195c0971d48", size = 55019 },
|
4012 |
+
{ url = "https://files.pythonhosted.org/packages/a0/d8/57304c2317822634abd475f5912584a3cfa13363740e9ec72c0622c894f1/simsimd-6.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:dc23283235d5b8f0373b95a547e26da2d7785647a5d0fa15c282fc8c49c0dcb0", size = 87133 },
|
4013 |
+
{ url = "https://files.pythonhosted.org/packages/3f/7b/ca333232a8bc87d1e846fa2feb9f0d4778500c30493726cb48f04551dfab/simsimd-6.2.1-cp312-cp312-win_arm64.whl", hash = "sha256:5692ce7e56253178eea9dbd58191734918409b83d54b07cfdcecf868d0150a73", size = 60401 },
|
4014 |
+
{ url = "https://files.pythonhosted.org/packages/9b/f2/4ec7ed52c910a58a07043c5f3355adf4055246dafb79be57d0726e1a4aa0/simsimd-6.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:76b32fdc7142c9714e94651ece8bc00dd5139c554813211552aa358e44af0e07", size = 169399 },
|
4015 |
+
{ url = "https://files.pythonhosted.org/packages/61/d3/5af24e4f42e2b5bc3a06456ea9068d0fbcd23d8ceeb0e09fe54ed72cfdba/simsimd-6.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f44e5e2319427f94db658c6f75caae78850da505902874a1664a83ef5713f333", size = 101484 },
|
4016 |
+
{ url = "https://files.pythonhosted.org/packages/cf/86/816050f0fd0767e960c6b900e3c97fd6a4ae54a6aa5b8ef24846757a3f7d/simsimd-6.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:05323cbad7200592c2e53fbcc759e615594e8ca444ef5eddf9f3fb196ad4de9c", size = 93447 },
|
4017 |
+
{ url = "https://files.pythonhosted.org/packages/e9/7e/61dc3392eafd9fc20357b448aac5f84c84ad61289ab0ab3e5a4aaa1ca3ef/simsimd-6.2.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b1f3cbe5c39db2bb64f30999104de1215ba3805d6059af7bc5a9d662d50f4707", size = 251501 },
|
4018 |
+
{ url = "https://files.pythonhosted.org/packages/06/55/99d3cf2c2d844c1a57d81379acaebac2e0a0efdf1e73a53990cd84c1d719/simsimd-6.2.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaa94e0932ae2a48b7e4df8c29204dc9fe59f72b1faeb08e9d5015bf51fb9f21", size = 302991 },
|
4019 |
+
{ url = "https://files.pythonhosted.org/packages/6f/99/597b322835147f407e6f611810cb8232055711398fbbd47e6a14bfc0995f/simsimd-6.2.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:508465f8d4e3e0fff07c939921aeedf55b0ade9f56f64e938c350c283dea42fb", size = 227917 },
|
4020 |
+
{ url = "https://files.pythonhosted.org/packages/ba/8a/6a6596a97d1cc7068a26935bbdd7f170a889240b8081e000aef09b6d0549/simsimd-6.2.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ca67f6273ef544c74c48b134af756de7c98a711ccf69cd0791225f26dd449281", size = 432527 },
|
4021 |
+
{ url = "https://files.pythonhosted.org/packages/46/0e/5c6e82fa9fe9a21481fe0f6546b4986e07e42bd4d8b6f04f4475b8d7564e/simsimd-6.2.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:d470b43ce606f21f54a23fc19ad6928333e17d0956b02eb27b7b112edc156a10", size = 633095 },
|
4022 |
+
{ url = "https://files.pythonhosted.org/packages/ae/53/2e17bd16e2ca2a73cd447b89fa7059ae7275c82840f229bf917936ee800a/simsimd-6.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59518b9834c167a1dd8900600718e95cdadc9d74525452f426aa8455a38c55ef", size = 468561 },
|
4023 |
+
{ url = "https://files.pythonhosted.org/packages/86/8b/1319605c630973741bc749b6e432e56dded2b6a7db0744b659c0de613ab3/simsimd-6.2.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:59c2978c4e402097d8a4b38f076ff98cc43e6b059d53f89736404f26e9a9bd5a", size = 269157 },
|
4024 |
+
{ url = "https://files.pythonhosted.org/packages/53/50/1cac5113a542c82d5b5399d454c578a65ba14951bfff38aef297104f72fe/simsimd-6.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:edc68e727d53ed2866dcfb625f15e52be8f1e6809f4be2147bf8d2115a2542b7", size = 344437 },
|
4025 |
+
{ url = "https://files.pythonhosted.org/packages/9a/72/44905ee0e2ed999c52ad1eebf2c8705ce2776212a6387d77355df2c76704/simsimd-6.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:9e5e82551d75c0e2cd0d4b8af8db1cae7b5ac6dcc076c0c760870ff81f78135b", size = 389569 },
|
4026 |
+
{ url = "https://files.pythonhosted.org/packages/ee/d6/9b4a9141ceb29150d86698553c8e0193256b069bc755e875836c14a6f12e/simsimd-6.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2fa19f8c9786757d19afcbda9f8fb68de55e4f5562725ae8727f887d01bf0e4d", size = 316923 },
|
4027 |
+
{ url = "https://files.pythonhosted.org/packages/ce/c0/de6aebd58b8de8f0177395b8fd68afb9a27ec010427c4ccd6104b94b6569/simsimd-6.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b0748aa6bd4df4c5a3f5e979aec14b26588f1b2e0d44075dcc9eaf4d555e15b", size = 670038 },
|
4028 |
+
{ url = "https://files.pythonhosted.org/packages/77/32/4c74664656231ccb43be4328dba40e9ada63d3cc1e557b1785ae0b9560b5/simsimd-6.2.1-cp313-cp313-win32.whl", hash = "sha256:7f43721e1a4ebe8d2245b0e85dd7de7153d1bf22839579d5f69a345909c68d9e", size = 55017 },
|
4029 |
+
{ url = "https://files.pythonhosted.org/packages/76/7f/57e02f6b2d09a1d42697e739b002bbe2112f8b8384d15d166154ec4cec44/simsimd-6.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:6af1565e0ef7060bc52a38e3273a8e6e92aff47835965dc5311298563475935e", size = 87138 },
|
4030 |
+
{ url = "https://files.pythonhosted.org/packages/38/b9/941876e98dd1f98c158cd5e6633dc1573d1be6daf8f2e3ad5d15e6a8024d/simsimd-6.2.1-cp313-cp313-win_arm64.whl", hash = "sha256:e690b41377c8dd157d585713b0bc35c845aee7742334bf12d1f087fc8a65b6c3", size = 60408 },
|
4031 |
+
]
|
4032 |
+
|
4033 |
[[package]]
|
4034 |
name = "six"
|
4035 |
version = "1.17.0"
|