Spaces:
Sleeping
Sleeping
Videos displayed; streaming responses
Browse files- app.py +40 -5
- notebooks/transcript_rag.ipynb +64 -23
- pstuts_rag/pstuts_rag/datastore.py +3 -1
- pstuts_rag/pstuts_rag/loader.py +30 -1
- pstuts_rag/pstuts_rag/rag.py +17 -2
app.py
CHANGED
|
@@ -11,6 +11,7 @@ from langchain_core.documents import Document
|
|
| 11 |
from langchain_core.language_models import BaseChatModel
|
| 12 |
from langchain_core.runnables import Runnable
|
| 13 |
from langchain_qdrant import QdrantVectorStore
|
|
|
|
| 14 |
from qdrant_client import QdrantClient
|
| 15 |
from dataclasses import dataclass
|
| 16 |
|
|
@@ -19,7 +20,7 @@ import pstuts_rag.rag, pstuts_rag.datastore
|
|
| 19 |
|
| 20 |
@dataclass
|
| 21 |
class ApplicationParameters:
|
| 22 |
-
filename = "data/
|
| 23 |
embedding_model = "text-embedding-3-small"
|
| 24 |
n_context_docs = 2
|
| 25 |
llm_model = "gpt-4.1-mini"
|
|
@@ -42,6 +43,9 @@ class ApplicationState:
|
|
| 42 |
llm: BaseChatModel
|
| 43 |
rag_chain: Runnable
|
| 44 |
|
|
|
|
|
|
|
|
|
|
| 45 |
def __init__(self) -> None:
|
| 46 |
load_dotenv()
|
| 47 |
set_api_key_if_not_present("OPENAI_API_KEY")
|
|
@@ -53,8 +57,13 @@ params = ApplicationParameters()
|
|
| 53 |
|
| 54 |
async def fill_the_db():
|
| 55 |
if state.datastore_manager.count_docs() == 0:
|
| 56 |
-
data: List[Dict[str, Any]] =
|
| 57 |
-
await state.datastore_manager.populate_database(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
|
| 60 |
async def build_the_chain():
|
|
@@ -80,10 +89,36 @@ async def on_chat_start():
|
|
| 80 |
@cl.on_message
|
| 81 |
async def main(message: cl.Message):
|
| 82 |
# Send a response back to the user
|
| 83 |
-
|
| 84 |
response = await state.rag_chain.ainvoke({"question": message.content})
|
| 85 |
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
if __name__ == "__main__":
|
|
|
|
| 11 |
from langchain_core.language_models import BaseChatModel
|
| 12 |
from langchain_core.runnables import Runnable
|
| 13 |
from langchain_qdrant import QdrantVectorStore
|
| 14 |
+
from pstuts_rag.loader import load_json_files, load_single_json
|
| 15 |
from qdrant_client import QdrantClient
|
| 16 |
from dataclasses import dataclass
|
| 17 |
|
|
|
|
| 20 |
|
| 21 |
@dataclass
|
| 22 |
class ApplicationParameters:
|
| 23 |
+
filename = [f"data/{f}.json" for f in ["dev"]]
|
| 24 |
embedding_model = "text-embedding-3-small"
|
| 25 |
n_context_docs = 2
|
| 26 |
llm_model = "gpt-4.1-mini"
|
|
|
|
| 43 |
llm: BaseChatModel
|
| 44 |
rag_chain: Runnable
|
| 45 |
|
| 46 |
+
hasLoaded: asyncio.Event = asyncio.Event()
|
| 47 |
+
pointsLoaded: int = 0
|
| 48 |
+
|
| 49 |
def __init__(self) -> None:
|
| 50 |
load_dotenv()
|
| 51 |
set_api_key_if_not_present("OPENAI_API_KEY")
|
|
|
|
| 57 |
|
| 58 |
async def fill_the_db():
|
| 59 |
if state.datastore_manager.count_docs() == 0:
|
| 60 |
+
data: List[Dict[str, Any]] = await load_json_files(params.filename)
|
| 61 |
+
state.pointsLoaded = await state.datastore_manager.populate_database(
|
| 62 |
+
raw_docs=data
|
| 63 |
+
)
|
| 64 |
+
await cl.Message(
|
| 65 |
+
content=f"✅ The database has been loaded with {state.pointsLoaded} elements!"
|
| 66 |
+
).send()
|
| 67 |
|
| 68 |
|
| 69 |
async def build_the_chain():
|
|
|
|
| 89 |
@cl.on_message
|
| 90 |
async def main(message: cl.Message):
|
| 91 |
# Send a response back to the user
|
| 92 |
+
msg = cl.Message(content="")
|
| 93 |
response = await state.rag_chain.ainvoke({"question": message.content})
|
| 94 |
|
| 95 |
+
text, references = pstuts_rag.rag.RAGChainFactory.unpack_references(
|
| 96 |
+
response.content
|
| 97 |
+
)
|
| 98 |
+
if isinstance(text, str):
|
| 99 |
+
for token in [char for char in text]:
|
| 100 |
+
await msg.stream_token(token)
|
| 101 |
+
|
| 102 |
+
await msg.send()
|
| 103 |
+
|
| 104 |
+
references = json.loads(references)
|
| 105 |
+
print(references)
|
| 106 |
+
|
| 107 |
+
msg_references = [
|
| 108 |
+
(
|
| 109 |
+
f"Watch {ref["title"]} from timestamp "
|
| 110 |
+
f"{round(ref["start"] // 60)}m:{round(ref["start"] % 60)}s",
|
| 111 |
+
cl.Video(
|
| 112 |
+
name=ref["title"],
|
| 113 |
+
url=f"{ref["source"]}#t={ref["start"]}",
|
| 114 |
+
display="side",
|
| 115 |
+
),
|
| 116 |
+
)
|
| 117 |
+
for ref in references
|
| 118 |
+
]
|
| 119 |
+
await cl.Message(content="Related videos").send()
|
| 120 |
+
for e in msg_references:
|
| 121 |
+
await cl.Message(content=e[0], elements=[e[1]]).send()
|
| 122 |
|
| 123 |
|
| 124 |
if __name__ == "__main__":
|
notebooks/transcript_rag.ipynb
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
@@ -15,7 +15,7 @@
|
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"cell_type": "code",
|
| 18 |
-
"execution_count":
|
| 19 |
"metadata": {},
|
| 20 |
"outputs": [],
|
| 21 |
"source": [
|
|
@@ -24,9 +24,18 @@
|
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"cell_type": "code",
|
| 27 |
-
"execution_count":
|
| 28 |
"metadata": {},
|
| 29 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
"source": [
|
| 31 |
"%load_ext autoreload\n",
|
| 32 |
"%autoreload 2\n"
|
|
@@ -34,7 +43,7 @@
|
|
| 34 |
},
|
| 35 |
{
|
| 36 |
"cell_type": "code",
|
| 37 |
-
"execution_count":
|
| 38 |
"metadata": {},
|
| 39 |
"outputs": [],
|
| 40 |
"source": [
|
|
@@ -50,7 +59,7 @@
|
|
| 50 |
},
|
| 51 |
{
|
| 52 |
"cell_type": "code",
|
| 53 |
-
"execution_count":
|
| 54 |
"metadata": {},
|
| 55 |
"outputs": [],
|
| 56 |
"source": [
|
|
@@ -78,15 +87,47 @@
|
|
| 78 |
},
|
| 79 |
{
|
| 80 |
"cell_type": "code",
|
| 81 |
-
"execution_count":
|
| 82 |
"metadata": {},
|
| 83 |
"outputs": [],
|
| 84 |
"source": [
|
| 85 |
"from ast import Dict\n",
|
| 86 |
"import json\n",
|
| 87 |
-
"
|
|
|
|
|
|
|
| 88 |
"from typing import List, Dict, Any\n",
|
| 89 |
-
"data:List[Dict[str,Any]] =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
]
|
| 91 |
},
|
| 92 |
{
|
|
@@ -105,7 +146,7 @@
|
|
| 105 |
},
|
| 106 |
{
|
| 107 |
"cell_type": "code",
|
| 108 |
-
"execution_count":
|
| 109 |
"metadata": {},
|
| 110 |
"outputs": [],
|
| 111 |
"source": [
|
|
@@ -139,7 +180,7 @@
|
|
| 139 |
},
|
| 140 |
{
|
| 141 |
"cell_type": "code",
|
| 142 |
-
"execution_count":
|
| 143 |
"metadata": {},
|
| 144 |
"outputs": [],
|
| 145 |
"source": [
|
|
@@ -150,7 +191,7 @@
|
|
| 150 |
},
|
| 151 |
{
|
| 152 |
"cell_type": "code",
|
| 153 |
-
"execution_count":
|
| 154 |
"metadata": {},
|
| 155 |
"outputs": [],
|
| 156 |
"source": [
|
|
@@ -161,7 +202,7 @@
|
|
| 161 |
},
|
| 162 |
{
|
| 163 |
"cell_type": "code",
|
| 164 |
-
"execution_count":
|
| 165 |
"metadata": {},
|
| 166 |
"outputs": [],
|
| 167 |
"source": [
|
|
@@ -171,7 +212,7 @@
|
|
| 171 |
},
|
| 172 |
{
|
| 173 |
"cell_type": "code",
|
| 174 |
-
"execution_count":
|
| 175 |
"metadata": {},
|
| 176 |
"outputs": [],
|
| 177 |
"source": [
|
|
@@ -180,19 +221,19 @@
|
|
| 180 |
},
|
| 181 |
{
|
| 182 |
"cell_type": "code",
|
| 183 |
-
"execution_count":
|
| 184 |
"metadata": {},
|
| 185 |
"outputs": [
|
| 186 |
{
|
| 187 |
"data": {
|
| 188 |
"text/plain": [
|
| 189 |
"{'refusal': None,\n",
|
| 190 |
-
" 'context': [Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[0.47, 3.41], [3.81, 9.13], [9.309999, 15.01], [15.299999, 20.57], [20.88, 23.3], [23.83, 27.93], [29.38, 32.79], [32.96, 33.92], [34.43, 40.21], [41.91, 45.37], [45.88, 49.01], [49.54, 55.130001], [55.72, 58.49], [58.72, 62.14]], 'start': 0.47, 'stop': 62.14, '_id': 21, '_collection_name': 'local_test'}, page_content=\"Layers are the building blocks of any image in Photoshop CC. So, it's important to understand, what layers are and why to use them - which we'll cover in this video. If you're following along, open this layered image from the downloadable practice files for this tutorial. You might think of layers like separate flat pints of glass, stacked one on top of the other. Each layer contains separate pieces of content. To get a sense of how layers are constructed, let's take a look at this Layers panel. I've closed my other panels, so that we can focus on the Layers panel. But you can skip that. By the way: If your Layers panel isn't showing, go up to the Window menu and choose Layers from there. The Layers panel is where you go to select and work with layers. In this image there are 4 layers, each with separate content. If you click the Eye icon to the left of a layer, you can toggle the visibility of that layer off and on. So, I'm going to turn off the visibility of the tailor layer. And keep your eye on the image, so you can see what's on that layer.\"),\n",
|
| 191 |
-
" Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[85.75, 88.659999], [89.42, 100.11], [101.469999, 108.64], [109.09, 117.459999], [117.75, 129.45], [129.97, 133.37], [133.73, 143.98], [144.76, 152.97]], 'start': 85.75, 'stop': 152.97, '_id': 23, '_collection_name': 'local_test'}, page_content=\"Now let's take a look at just one layer, the tailor layer. A quick way to turn off all the layers except the tailor layer, is to hold down the Option key on the Mac, or the ALT key on the PC, and click on the Eye icon to the left of the tailor layer. In the Document window, you can see that this layer contains just the one small photo surrounded by a gray and white checkerboard pattern. That pattern represents transparent pixels, which allow us to see down through the corresponding part of this layer to the content of the layers below. So, let's turn that content back on by going back to the Layers panel, again holding the Option key on the Mac or the ALT key on the PC and clicking on the Eye icon to the left of the tailor layer. And all the other layers and their Eye icons come back into view. So again: You might think of layers like a stack of pints of glass, each with its own artwork and in some cases transparent areas that let you see down through to the layers below. The biggest benefit of having items on separate layers like this, is that you'll be able to edit pieces of an image independently without affecting the rest of the image.\")],\n",
|
| 192 |
" 'question': 'What are layers'}"
|
| 193 |
]
|
| 194 |
},
|
| 195 |
-
"execution_count":
|
| 196 |
"metadata": {},
|
| 197 |
"output_type": "execute_result"
|
| 198 |
}
|
|
@@ -203,7 +244,7 @@
|
|
| 203 |
},
|
| 204 |
{
|
| 205 |
"cell_type": "code",
|
| 206 |
-
"execution_count":
|
| 207 |
"metadata": {},
|
| 208 |
"outputs": [
|
| 209 |
{
|
|
@@ -212,7 +253,7 @@
|
|
| 212 |
"text": [
|
| 213 |
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 214 |
"\n",
|
| 215 |
-
"Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other
|
| 216 |
"**References**:\n",
|
| 217 |
"[\n",
|
| 218 |
" {\n",
|
|
@@ -237,16 +278,16 @@
|
|
| 237 |
},
|
| 238 |
{
|
| 239 |
"cell_type": "code",
|
| 240 |
-
"execution_count":
|
| 241 |
"metadata": {},
|
| 242 |
"outputs": [
|
| 243 |
{
|
| 244 |
"data": {
|
| 245 |
"text/plain": [
|
| 246 |
-
"'Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other
|
| 247 |
]
|
| 248 |
},
|
| 249 |
-
"execution_count":
|
| 250 |
"metadata": {},
|
| 251 |
"output_type": "execute_result"
|
| 252 |
}
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 36,
|
| 6 |
"metadata": {},
|
| 7 |
"outputs": [],
|
| 8 |
"source": [
|
|
|
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"cell_type": "code",
|
| 18 |
+
"execution_count": 37,
|
| 19 |
"metadata": {},
|
| 20 |
"outputs": [],
|
| 21 |
"source": [
|
|
|
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"cell_type": "code",
|
| 27 |
+
"execution_count": 38,
|
| 28 |
"metadata": {},
|
| 29 |
+
"outputs": [
|
| 30 |
+
{
|
| 31 |
+
"name": "stdout",
|
| 32 |
+
"output_type": "stream",
|
| 33 |
+
"text": [
|
| 34 |
+
"The autoreload extension is already loaded. To reload it, use:\n",
|
| 35 |
+
" %reload_ext autoreload\n"
|
| 36 |
+
]
|
| 37 |
+
}
|
| 38 |
+
],
|
| 39 |
"source": [
|
| 40 |
"%load_ext autoreload\n",
|
| 41 |
"%autoreload 2\n"
|
|
|
|
| 43 |
},
|
| 44 |
{
|
| 45 |
"cell_type": "code",
|
| 46 |
+
"execution_count": 39,
|
| 47 |
"metadata": {},
|
| 48 |
"outputs": [],
|
| 49 |
"source": [
|
|
|
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"cell_type": "code",
|
| 62 |
+
"execution_count": 40,
|
| 63 |
"metadata": {},
|
| 64 |
"outputs": [],
|
| 65 |
"source": [
|
|
|
|
| 87 |
},
|
| 88 |
{
|
| 89 |
"cell_type": "code",
|
| 90 |
+
"execution_count": 53,
|
| 91 |
"metadata": {},
|
| 92 |
"outputs": [],
|
| 93 |
"source": [
|
| 94 |
"from ast import Dict\n",
|
| 95 |
"import json\n",
|
| 96 |
+
"\n",
|
| 97 |
+
"from pstuts_rag.loader import load_json_files\n",
|
| 98 |
+
"filename = [\"../data/test.json\",\"../data/dev.json\"]\n",
|
| 99 |
"from typing import List, Dict, Any\n",
|
| 100 |
+
"data:List[Dict[str,Any]] = await load_json_files(filename)\n"
|
| 101 |
+
]
|
| 102 |
+
},
|
| 103 |
+
{
|
| 104 |
+
"cell_type": "code",
|
| 105 |
+
"execution_count": 56,
|
| 106 |
+
"metadata": {},
|
| 107 |
+
"outputs": [
|
| 108 |
+
{
|
| 109 |
+
"data": {
|
| 110 |
+
"text/plain": [
|
| 111 |
+
"['Get organized with layer groups',\n",
|
| 112 |
+
" 'Remove unwanted objects from photos',\n",
|
| 113 |
+
" 'Include vector graphics',\n",
|
| 114 |
+
" 'Remove unwanted content',\n",
|
| 115 |
+
" 'Add a central element',\n",
|
| 116 |
+
" 'Set the resolution',\n",
|
| 117 |
+
" 'Understand layers',\n",
|
| 118 |
+
" 'Adjust brightness and contrast',\n",
|
| 119 |
+
" 'Remove a large object',\n",
|
| 120 |
+
" 'Add text',\n",
|
| 121 |
+
" 'Replace a background using a layer mask']"
|
| 122 |
+
]
|
| 123 |
+
},
|
| 124 |
+
"execution_count": 56,
|
| 125 |
+
"metadata": {},
|
| 126 |
+
"output_type": "execute_result"
|
| 127 |
+
}
|
| 128 |
+
],
|
| 129 |
+
"source": [
|
| 130 |
+
"[ d[\"title\"] for d in data ]"
|
| 131 |
]
|
| 132 |
},
|
| 133 |
{
|
|
|
|
| 146 |
},
|
| 147 |
{
|
| 148 |
"cell_type": "code",
|
| 149 |
+
"execution_count": 43,
|
| 150 |
"metadata": {},
|
| 151 |
"outputs": [],
|
| 152 |
"source": [
|
|
|
|
| 180 |
},
|
| 181 |
{
|
| 182 |
"cell_type": "code",
|
| 183 |
+
"execution_count": 44,
|
| 184 |
"metadata": {},
|
| 185 |
"outputs": [],
|
| 186 |
"source": [
|
|
|
|
| 191 |
},
|
| 192 |
{
|
| 193 |
"cell_type": "code",
|
| 194 |
+
"execution_count": 45,
|
| 195 |
"metadata": {},
|
| 196 |
"outputs": [],
|
| 197 |
"source": [
|
|
|
|
| 202 |
},
|
| 203 |
{
|
| 204 |
"cell_type": "code",
|
| 205 |
+
"execution_count": 46,
|
| 206 |
"metadata": {},
|
| 207 |
"outputs": [],
|
| 208 |
"source": [
|
|
|
|
| 212 |
},
|
| 213 |
{
|
| 214 |
"cell_type": "code",
|
| 215 |
+
"execution_count": 47,
|
| 216 |
"metadata": {},
|
| 217 |
"outputs": [],
|
| 218 |
"source": [
|
|
|
|
| 221 |
},
|
| 222 |
{
|
| 223 |
"cell_type": "code",
|
| 224 |
+
"execution_count": 48,
|
| 225 |
"metadata": {},
|
| 226 |
"outputs": [
|
| 227 |
{
|
| 228 |
"data": {
|
| 229 |
"text/plain": [
|
| 230 |
"{'refusal': None,\n",
|
| 231 |
+
" 'context': [Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[0.47, 3.41], [3.81, 9.13], [9.309999, 15.01], [15.299999, 20.57], [20.88, 23.3], [23.83, 27.93], [29.38, 32.79], [32.96, 33.92], [34.43, 40.21], [41.91, 45.37], [45.88, 49.01], [49.54, 55.130001], [55.72, 58.49], [58.72, 62.14]], 'start': 0.47, 'stop': 62.14, '_id': 21, '_collection_name': 'local_test'}, page_content=\"Layers are the building blocks of any image in Photoshop CC. So, it's important to understand, what layers are and why to use them - which we'll cover in this video. If you're following along, open this layered image from the downloadable practice files for this tutorial. You might think of layers like separate flat pints of glass, stacked one on top of the other. Each layer contains separate pieces of content. To get a sense of how layers are constructed, let's take a look at this Layers panel. I've closed my other panels, so that we can focus on the Layers panel. But you can skip that. By the way: If your Layers panel isn't showing, go up to the Window menu and choose Layers from there. The Layers panel is where you go to select and work with layers. In this image there are 4 layers, each with separate content. If you click the Eye icon to the left of a layer, you can toggle the visibility of that layer off and on. So, I'm going to turn off the visibility of the tailor layer. And keep your eye on the image, so you can see what's on that layer.\"),\n",
|
| 232 |
+
" Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[85.75, 88.659999], [89.42, 100.11], [101.469999, 108.64], [109.09, 117.459999], [117.75, 129.45], [129.97, 133.37], [133.73, 143.98], [144.76, 152.97]], 'start': 85.75, 'stop': 152.97, '_id': 23, '_collection_name': 'local_test'}, page_content=\"Now let's take a look at just one layer, the tailor layer. A quick way to turn off all the layers except the tailor layer, is to hold down the Option key on the Mac, or the ALT key on the PC, and click on the Eye icon to the left of the tailor layer. In the Document window, you can see that this layer contains just the one small photo surrounded by a gray and white checkerboard pattern. That pattern represents transparent pixels, which allow us to see down through the corresponding part of this layer to the content of the layers below. So, let's turn that content back on by going back to the Layers panel, again holding the Option key on the Mac or the ALT key on the PC and clicking on the Eye icon to the left of the tailor layer. And all the other layers and their Eye icons come back into view. So again: You might think of layers like a stack of pints of glass, each with its own artwork and in some cases transparent areas that let you see down through to the layers below. The biggest benefit of having items on separate layers like this, is that you'll be able to edit pieces of an image independently without affecting the rest of the image.\")],\n",
|
| 233 |
" 'question': 'What are layers'}"
|
| 234 |
]
|
| 235 |
},
|
| 236 |
+
"execution_count": 48,
|
| 237 |
"metadata": {},
|
| 238 |
"output_type": "execute_result"
|
| 239 |
}
|
|
|
|
| 244 |
},
|
| 245 |
{
|
| 246 |
"cell_type": "code",
|
| 247 |
+
"execution_count": 49,
|
| 248 |
"metadata": {},
|
| 249 |
"outputs": [
|
| 250 |
{
|
|
|
|
| 253 |
"text": [
|
| 254 |
"==================================\u001b[1m Ai Message \u001b[0m==================================\n",
|
| 255 |
"\n",
|
| 256 |
+
"Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other. Each layer contains separate pieces of content. Some parts of a layer can be transparent, allowing you to see through to the layers below. This setup lets you edit parts of an image independently without affecting the rest of the image. You manage and work with layers in the Layers panel, where you can toggle their visibility on and off using the Eye icon. (See explanation around 0:28–1:00 and 1:25–2:32) 🎨🖼️\n",
|
| 257 |
"**References**:\n",
|
| 258 |
"[\n",
|
| 259 |
" {\n",
|
|
|
|
| 278 |
},
|
| 279 |
{
|
| 280 |
"cell_type": "code",
|
| 281 |
+
"execution_count": 50,
|
| 282 |
"metadata": {},
|
| 283 |
"outputs": [
|
| 284 |
{
|
| 285 |
"data": {
|
| 286 |
"text/plain": [
|
| 287 |
+
"'Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other. Each layer contains separate pieces of content. Some parts of a layer can be transparent, allowing you to see through to the layers below. This setup lets you edit parts of an image independently without affecting the rest of the image. You manage and work with layers in the Layers panel, where you can toggle their visibility on and off using the Eye icon. (See explanation around 0:28–1:00 and 1:25–2:32) 🎨🖼️\\n**References**:\\n[\\n {\\n \"title\": \"Understand layers\",\\n \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\\n \"start\": 0.47,\\n \"stop\": 62.14\\n },\\n {\\n \"title\": \"Understand layers\",\\n \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\\n \"start\": 85.75,\\n \"stop\": 152.97\\n }\\n]'"
|
| 288 |
]
|
| 289 |
},
|
| 290 |
+
"execution_count": 50,
|
| 291 |
"metadata": {},
|
| 292 |
"output_type": "execute_result"
|
| 293 |
}
|
pstuts_rag/pstuts_rag/datastore.py
CHANGED
|
@@ -159,7 +159,7 @@ class DatastoreManager:
|
|
| 159 |
|
| 160 |
self.docs = []
|
| 161 |
|
| 162 |
-
async def populate_database(self, raw_docs: List[Dict[str, Any]]):
|
| 163 |
|
| 164 |
# perform chunking
|
| 165 |
self.docs: List[Document] = await chunk_transcripts(
|
|
@@ -200,6 +200,8 @@ class DatastoreManager:
|
|
| 200 |
points=points,
|
| 201 |
)
|
| 202 |
|
|
|
|
|
|
|
| 203 |
def count_docs(self) -> int:
|
| 204 |
try:
|
| 205 |
count = self.qdrant_client.get_collection(self.name).points_count
|
|
|
|
| 159 |
|
| 160 |
self.docs = []
|
| 161 |
|
| 162 |
+
async def populate_database(self, raw_docs: List[Dict[str, Any]]) -> int:
|
| 163 |
|
| 164 |
# perform chunking
|
| 165 |
self.docs: List[Document] = await chunk_transcripts(
|
|
|
|
| 200 |
points=points,
|
| 201 |
)
|
| 202 |
|
| 203 |
+
return len(points)
|
| 204 |
+
|
| 205 |
def count_docs(self) -> int:
|
| 206 |
try:
|
| 207 |
count = self.qdrant_client.get_collection(self.name).points_count
|
pstuts_rag/pstuts_rag/loader.py
CHANGED
|
@@ -1,7 +1,34 @@
|
|
|
|
|
|
|
|
| 1 |
from langchain_core.document_loaders import BaseLoader
|
| 2 |
from typing import List, Dict, Iterator
|
| 3 |
from langchain_core.documents import Document
|
| 4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
class VideoTranscriptBulkLoader(BaseLoader):
|
| 7 |
"""Loads video transcripts as a bulk into documents"""
|
|
@@ -21,7 +48,9 @@ class VideoTranscriptBulkLoader(BaseLoader):
|
|
| 21 |
if "url" in metadata:
|
| 22 |
metadata["source"] = metadata.pop("url")
|
| 23 |
yield Document(
|
| 24 |
-
page_content="\n".join(
|
|
|
|
|
|
|
| 25 |
metadata=metadata,
|
| 26 |
)
|
| 27 |
|
|
|
|
| 1 |
+
import glob
|
| 2 |
+
import json
|
| 3 |
from langchain_core.document_loaders import BaseLoader
|
| 4 |
from typing import List, Dict, Iterator
|
| 5 |
from langchain_core.documents import Document
|
| 6 |
|
| 7 |
+
import aiofiles
|
| 8 |
+
import asyncio
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
async def load_single_json(filepath):
|
| 13 |
+
my_path = Path(filepath)
|
| 14 |
+
|
| 15 |
+
async with aiofiles.open(my_path, mode="r", encoding="utf-8") as f:
|
| 16 |
+
content = await f.read()
|
| 17 |
+
payload: List[Dict] = json.loads(content)
|
| 18 |
+
[video.update({"group": my_path.name}) for video in payload]
|
| 19 |
+
|
| 20 |
+
return payload
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
async def load_json_files(path_pattern: List[str]):
|
| 24 |
+
files = []
|
| 25 |
+
for f in path_pattern:
|
| 26 |
+
(files.extend(glob.glob(f, recursive=True)))
|
| 27 |
+
|
| 28 |
+
tasks = [load_single_json(f) for f in files]
|
| 29 |
+
results = await asyncio.gather(*tasks)
|
| 30 |
+
return [item for sublist in results for item in sublist] # flatten
|
| 31 |
+
|
| 32 |
|
| 33 |
class VideoTranscriptBulkLoader(BaseLoader):
|
| 34 |
"""Loads video transcripts as a bulk into documents"""
|
|
|
|
| 48 |
if "url" in metadata:
|
| 49 |
metadata["source"] = metadata.pop("url")
|
| 50 |
yield Document(
|
| 51 |
+
page_content="\n".join(
|
| 52 |
+
t["sent"] for t in video["transcripts"]
|
| 53 |
+
),
|
| 54 |
metadata=metadata,
|
| 55 |
)
|
| 56 |
|
pstuts_rag/pstuts_rag/rag.py
CHANGED
|
@@ -7,9 +7,10 @@ This module provides the core RAG functionality, including:
|
|
| 7 |
|
| 8 |
import json
|
| 9 |
from multiprocessing import Value
|
|
|
|
| 10 |
import uuid
|
| 11 |
from operator import itemgetter
|
| 12 |
-
from typing import Dict, List, Any
|
| 13 |
|
| 14 |
from langchain_core.documents import Document
|
| 15 |
from langchain_core.runnables import (
|
|
@@ -97,7 +98,7 @@ class RAGChainFactory:
|
|
| 97 |
)
|
| 98 |
|
| 99 |
text_w_references = "\n".join(
|
| 100 |
-
[answer.content, "**
|
| 101 |
)
|
| 102 |
|
| 103 |
output: AIMessage = answer.model_copy(
|
|
@@ -113,6 +114,20 @@ class RAGChainFactory:
|
|
| 113 |
|
| 114 |
return output
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
def __init__(
|
| 117 |
self,
|
| 118 |
retriever: VectorStoreRetriever,
|
|
|
|
| 7 |
|
| 8 |
import json
|
| 9 |
from multiprocessing import Value
|
| 10 |
+
import re
|
| 11 |
import uuid
|
| 12 |
from operator import itemgetter
|
| 13 |
+
from typing import Dict, List, Any, Tuple
|
| 14 |
|
| 15 |
from langchain_core.documents import Document
|
| 16 |
from langchain_core.runnables import (
|
|
|
|
| 98 |
)
|
| 99 |
|
| 100 |
text_w_references = "\n".join(
|
| 101 |
+
[answer.content, "**REFERENCES**", references]
|
| 102 |
)
|
| 103 |
|
| 104 |
output: AIMessage = answer.model_copy(
|
|
|
|
| 114 |
|
| 115 |
return output
|
| 116 |
|
| 117 |
+
@staticmethod
|
| 118 |
+
def unpack_references(content: str) -> Tuple[str, str]:
|
| 119 |
+
parts = re.split(r"\*\*REFERENCES\*\*\s*", content, maxsplit=1)
|
| 120 |
+
|
| 121 |
+
if len(parts) == 2:
|
| 122 |
+
text = parts[0].rstrip()
|
| 123 |
+
references = parts[1].lstrip()
|
| 124 |
+
return text, references
|
| 125 |
+
|
| 126 |
+
else:
|
| 127 |
+
raise ValueError(
|
| 128 |
+
f"No '**References:**' section found in input:\n{content}"
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
def __init__(
|
| 132 |
self,
|
| 133 |
retriever: VectorStoreRetriever,
|