Spaces:

mbudisic
/

PsTuts-RAG

Sleeping

App Files Files Community

mbudisic commited on May 14

Commit

c419378

1 Parent(s): cf7b605

Videos displayed; streaming responses

Browse files

Files changed (5) hide show

app.py +40 -5
notebooks/transcript_rag.ipynb +64 -23
pstuts_rag/pstuts_rag/datastore.py +3 -1
pstuts_rag/pstuts_rag/loader.py +30 -1
pstuts_rag/pstuts_rag/rag.py +17 -2

app.py CHANGED Viewed

@@ -11,6 +11,7 @@ from langchain_core.documents import Document
 from langchain_core.language_models import BaseChatModel
 from langchain_core.runnables import Runnable
 from langchain_qdrant import QdrantVectorStore
 from qdrant_client import QdrantClient
 from dataclasses import dataclass
@@ -19,7 +20,7 @@ import pstuts_rag.rag, pstuts_rag.datastore
 @dataclass
 class ApplicationParameters:
-    filename = "data/test.json"
     embedding_model = "text-embedding-3-small"
     n_context_docs = 2
     llm_model = "gpt-4.1-mini"
@@ -42,6 +43,9 @@ class ApplicationState:
     llm: BaseChatModel
     rag_chain: Runnable
     def __init__(self) -> None:
         load_dotenv()
         set_api_key_if_not_present("OPENAI_API_KEY")
@@ -53,8 +57,13 @@ params = ApplicationParameters()
 async def fill_the_db():
     if state.datastore_manager.count_docs() == 0:
-        data: List[Dict[str, Any]] = json.load(open(params.filename, "rb"))
-        await state.datastore_manager.populate_database(raw_docs=data)
 async def build_the_chain():
@@ -80,10 +89,36 @@ async def on_chat_start():
 @cl.on_message
 async def main(message: cl.Message):
     # Send a response back to the user
     response = await state.rag_chain.ainvoke({"question": message.content})
-    await cl.Message(content=response.content).send()
 if __name__ == "__main__":

 from langchain_core.language_models import BaseChatModel
 from langchain_core.runnables import Runnable
 from langchain_qdrant import QdrantVectorStore
+from pstuts_rag.loader import load_json_files, load_single_json
 from qdrant_client import QdrantClient
 from dataclasses import dataclass
 @dataclass
 class ApplicationParameters:
+    filename = [f"data/{f}.json" for f in ["dev"]]
     embedding_model = "text-embedding-3-small"
     n_context_docs = 2
     llm_model = "gpt-4.1-mini"
     llm: BaseChatModel
     rag_chain: Runnable
+    hasLoaded: asyncio.Event = asyncio.Event()
+    pointsLoaded: int = 0
     def __init__(self) -> None:
         load_dotenv()
         set_api_key_if_not_present("OPENAI_API_KEY")
 async def fill_the_db():
     if state.datastore_manager.count_docs() == 0:
+        data: List[Dict[str, Any]] = await load_json_files(params.filename)
+        state.pointsLoaded = await state.datastore_manager.populate_database(
+            raw_docs=data
+        )
+        await cl.Message(
+            content=f"✅ The database has been loaded with {state.pointsLoaded} elements!"
+        ).send()
 async def build_the_chain():
 @cl.on_message
 async def main(message: cl.Message):
     # Send a response back to the user
+    msg = cl.Message(content="")
     response = await state.rag_chain.ainvoke({"question": message.content})
+    text, references = pstuts_rag.rag.RAGChainFactory.unpack_references(
+        response.content
+    )
+    if isinstance(text, str):
+        for token in [char for char in text]:
+            await msg.stream_token(token)
+    await msg.send()
+    references = json.loads(references)
+    print(references)
+    msg_references = [
+        (
+            f"Watch {ref["title"]} from timestamp "
+            f"{round(ref["start"] // 60)}m:{round(ref["start"] % 60)}s",
+            cl.Video(
+                name=ref["title"],
+                url=f"{ref["source"]}#t={ref["start"]}",
+                display="side",
+            ),
+        )
+        for ref in references
+    ]
+    await cl.Message(content="Related videos").send()
+    for e in msg_references:
+        await cl.Message(content=e[0], elements=[e[1]]).send()
 if __name__ == "__main__":

notebooks/transcript_rag.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -15,7 +15,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -24,9 +24,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2\n"
@@ -34,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -50,7 +59,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -78,15 +87,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "from ast import Dict\n",
     "import json\n",
-    "filename = \"../data/test.json\"\n",
     "from typing import List, Dict, Any\n",
-    "data:List[Dict[str,Any]] = json.load(open(filename, \"rb\"))\n"
    ]
   },
   {
@@ -105,7 +146,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -139,7 +180,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -150,7 +191,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -161,7 +202,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -171,7 +212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -180,19 +221,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{'refusal': None,\n",
-       " 'context': [Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[0.47, 3.41], [3.81, 9.13], [9.309999, 15.01], [15.299999, 20.57], [20.88, 23.3], [23.83, 27.93], [29.38, 32.79], [32.96, 33.92], [34.43, 40.21], [41.91, 45.37], [45.88, 49.01], [49.54, 55.130001], [55.72, 58.49], [58.72, 62.14]], 'start': 0.47, 'stop': 62.14, '_id': 21, '_collection_name': 'local_test'}, page_content=\"Layers are the building blocks of any image in Photoshop CC. So, it's important to understand, what layers are and why to use them - which we'll cover in this video. If you're following along, open this layered image from the downloadable practice files for this tutorial. You might think of layers like separate flat pints of glass, stacked one on top of the other. Each layer contains separate pieces of content. To get a sense of how layers are constructed, let's take a look at this Layers panel. I've closed my other panels, so that we can focus on the Layers panel. But you can skip that. By the way: If your Layers panel isn't showing, go up to the Window menu and choose Layers from there. The Layers panel is where you go to select and work with layers. In this image there are 4 layers, each with separate content. If you click the Eye icon to the left of a layer, you can toggle the visibility of that layer off and on. So, I'm going to turn off the visibility of the tailor layer. And keep your eye on the image, so you can see what's on that layer.\"),\n",
-       "  Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[85.75, 88.659999], [89.42, 100.11], [101.469999, 108.64], [109.09, 117.459999], [117.75, 129.45], [129.97, 133.37], [133.73, 143.98], [144.76, 152.97]], 'start': 85.75, 'stop': 152.97, '_id': 23, '_collection_name': 'local_test'}, page_content=\"Now let's take a look at just one layer, the tailor layer. A quick way to turn off all the layers except the tailor layer, is to hold down the Option key on the Mac, or the ALT key on the PC, and click on the Eye icon to the left of the tailor layer. In the Document window, you can see that this layer contains just the one small photo surrounded by a gray and white checkerboard pattern. That pattern represents transparent pixels, which allow us to see down through the corresponding part of this layer to the content of the layers below. So, let's turn that content back on by going back to the Layers panel, again holding the Option key on the Mac or the ALT key on the PC and clicking on the Eye icon to the left of the tailor layer. And all the other layers and their Eye icons come back into view. So again: You might think of layers like a stack of pints of glass, each with its own artwork and in some cases transparent areas that let you see down through to the layers below. The biggest benefit of having items on separate layers like this, is that you'll be able to edit pieces of an image independently without affecting the rest of the image.\")],\n",
        " 'question': 'What are layers'}"
       ]
      },
-     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -203,7 +244,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -212,7 +253,7 @@
      "text": [
       "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
       "\n",
-      "Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other, with each layer containing separate pieces of content. Some parts of a layer can be transparent, allowing you to see through to the layers below. This setup lets you edit parts of an image independently without affecting the rest of the image. You work with layers in the Layers panel, where you can toggle their visibility on and off. (See explanation around 0:28 to 1:00 and 1:25 to 2:32) 🎨🖼️\n",
       "**References**:\n",
       "[\n",
       "  {\n",
@@ -237,16 +278,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other, with each layer containing separate pieces of content. Some parts of a layer can be transparent, allowing you to see through to the layers below. This setup lets you edit parts of an image independently without affecting the rest of the image. You work with layers in the Layers panel, where you can toggle their visibility on and off. (See explanation around 0:28 to 1:00 and 1:25 to 2:32) 🎨🖼️\\n**References**:\\n[\\n  {\\n    \"title\": \"Understand layers\",\\n    \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\\n    \"start\": 0.47,\\n    \"stop\": 62.14\\n  },\\n  {\\n    \"title\": \"Understand layers\",\\n    \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\\n    \"start\": 85.75,\\n    \"stop\": 152.97\\n  }\\n]'"
       ]
      },
-     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 36,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 38,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2\n"
   },
   {
    "cell_type": "code",
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 53,
    "metadata": {},
    "outputs": [],
    "source": [
     "from ast import Dict\n",
     "import json\n",
+    "\n",
+    "from pstuts_rag.loader import load_json_files\n",
+    "filename = [\"../data/test.json\",\"../data/dev.json\"]\n",
     "from typing import List, Dict, Any\n",
+    "data:List[Dict[str,Any]] = await load_json_files(filename)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['Get organized with layer groups',\n",
+       " 'Remove unwanted objects from photos',\n",
+       " 'Include vector graphics',\n",
+       " 'Remove unwanted content',\n",
+       " 'Add a central element',\n",
+       " 'Set the resolution',\n",
+       " 'Understand layers',\n",
+       " 'Adjust brightness and contrast',\n",
+       " 'Remove a large object',\n",
+       " 'Add text',\n",
+       " 'Replace a background using a layer mask']"
+      ]
+     },
+     "execution_count": 56,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[ d[\"title\"] for d in data ]"
    ]
   },
   {
   },
   {
    "cell_type": "code",
+   "execution_count": 43,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 44,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 45,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 46,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 47,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 48,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{'refusal': None,\n",
+       " 'context': [Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[0.47, 3.41], [3.81, 9.13], [9.309999, 15.01], [15.299999, 20.57], [20.88, 23.3], [23.83, 27.93], [29.38, 32.79], [32.96, 33.92], [34.43, 40.21], [41.91, 45.37], [45.88, 49.01], [49.54, 55.130001], [55.72, 58.49], [58.72, 62.14]], 'start': 0.47, 'stop': 62.14, '_id': 21, '_collection_name': 'local_test'}, page_content=\"Layers are the building blocks of any image in Photoshop CC. So, it's important to understand, what layers are and why to use them - which we'll cover in this video. If you're following along, open this layered image from the downloadable practice files for this tutorial. You might think of layers like separate flat pints of glass, stacked one on top of the other. Each layer contains separate pieces of content. To get a sense of how layers are constructed, let's take a look at this Layers panel. I've closed my other panels, so that we can focus on the Layers panel. But you can skip that. By the way: If your Layers panel isn't showing, go up to the Window menu and choose Layers from there. The Layers panel is where you go to select and work with layers. In this image there are 4 layers, each with separate content. If you click the Eye icon to the left of a layer, you can toggle the visibility of that layer off and on. So, I'm going to turn off the visibility of the tailor layer. And keep your eye on the image, so you can see what's on that layer.\"),\n",
+       "  Document(metadata={'video_id': 19172, 'title': 'Understand layers', 'desc': 'Learn what layers are and why they are so useful.', 'length': '00:04:44.75', 'group': 'test.json', 'source': 'https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4', 'speech_start_stop_times': [[85.75, 88.659999], [89.42, 100.11], [101.469999, 108.64], [109.09, 117.459999], [117.75, 129.45], [129.97, 133.37], [133.73, 143.98], [144.76, 152.97]], 'start': 85.75, 'stop': 152.97, '_id': 23, '_collection_name': 'local_test'}, page_content=\"Now let's take a look at just one layer, the tailor layer. A quick way to turn off all the layers except the tailor layer, is to hold down the Option key on the Mac, or the ALT key on the PC, and click on the Eye icon to the left of the tailor layer. In the Document window, you can see that this layer contains just the one small photo surrounded by a gray and white checkerboard pattern. That pattern represents transparent pixels, which allow us to see down through the corresponding part of this layer to the content of the layers below. So, let's turn that content back on by going back to the Layers panel, again holding the Option key on the Mac or the ALT key on the PC and clicking on the Eye icon to the left of the tailor layer. And all the other layers and their Eye icons come back into view. So again: You might think of layers like a stack of pints of glass, each with its own artwork and in some cases transparent areas that let you see down through to the layers below. The biggest benefit of having items on separate layers like this, is that you'll be able to edit pieces of an image independently without affecting the rest of the image.\")],\n",
        " 'question': 'What are layers'}"
       ]
      },
+     "execution_count": 48,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 49,
    "metadata": {},
    "outputs": [
     {
      "text": [
       "==================================\u001b[1m Ai Message \u001b[0m==================================\n",
       "\n",
+      "Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other. Each layer contains separate pieces of content. Some parts of a layer can be transparent, allowing you to see through to the layers below. This setup lets you edit parts of an image independently without affecting the rest of the image. You manage and work with layers in the Layers panel, where you can toggle their visibility on and off using the Eye icon. (See explanation around 0:28–1:00 and 1:25–2:32) 🎨🖼️\n",
       "**References**:\n",
       "[\n",
       "  {\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 50,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "'Layers are the building blocks of any image in Photoshop CC. You can think of layers like separate flat panes of glass stacked on top of each other. Each layer contains separate pieces of content. Some parts of a layer can be transparent, allowing you to see through to the layers below. This setup lets you edit parts of an image independently without affecting the rest of the image. You manage and work with layers in the Layers panel, where you can toggle their visibility on and off using the Eye icon. (See explanation around 0:28–1:00 and 1:25–2:32) 🎨🖼️\\n**References**:\\n[\\n  {\\n    \"title\": \"Understand layers\",\\n    \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\\n    \"start\": 0.47,\\n    \"stop\": 62.14\\n  },\\n  {\\n    \"title\": \"Understand layers\",\\n    \"source\": \"https://images-tv.adobe.com/avp/vr/b758b4c4-2a74-41f4-8e67-e2f2eab83c6a/f810fc5b-2b04-4e23-8fa4-5c532e7de6f8/e268fe4d-e5c7-415c-9f5c-d34d024b14d8_20170727011753.1280x720at2400_h264.mp4\",\\n    \"start\": 85.75,\\n    \"stop\": 152.97\\n  }\\n]'"
       ]
      },
+     "execution_count": 50,
      "metadata": {},
      "output_type": "execute_result"
     }

pstuts_rag/pstuts_rag/datastore.py CHANGED Viewed

@@ -159,7 +159,7 @@ class DatastoreManager:
         self.docs = []
-    async def populate_database(self, raw_docs: List[Dict[str, Any]]):
         # perform chunking
         self.docs: List[Document] = await chunk_transcripts(
@@ -200,6 +200,8 @@ class DatastoreManager:
             points=points,
         )
     def count_docs(self) -> int:
         try:
             count = self.qdrant_client.get_collection(self.name).points_count

         self.docs = []
+    async def populate_database(self, raw_docs: List[Dict[str, Any]]) -> int:
         # perform chunking
         self.docs: List[Document] = await chunk_transcripts(
             points=points,
         )
+        return len(points)
     def count_docs(self) -> int:
         try:
             count = self.qdrant_client.get_collection(self.name).points_count

pstuts_rag/pstuts_rag/loader.py CHANGED Viewed

@@ -1,7 +1,34 @@
 from langchain_core.document_loaders import BaseLoader
 from typing import List, Dict, Iterator
 from langchain_core.documents import Document
 class VideoTranscriptBulkLoader(BaseLoader):
     """Loads video transcripts as a bulk into documents"""
@@ -21,7 +48,9 @@ class VideoTranscriptBulkLoader(BaseLoader):
             if "url" in metadata:
                 metadata["source"] = metadata.pop("url")
             yield Document(
-                page_content="\n".join(t["sent"] for t in video["transcripts"]),
                 metadata=metadata,
             )

+import glob
+import json
 from langchain_core.document_loaders import BaseLoader
 from typing import List, Dict, Iterator
 from langchain_core.documents import Document
+import aiofiles
+import asyncio
+from pathlib import Path
+async def load_single_json(filepath):
+    my_path = Path(filepath)
+    async with aiofiles.open(my_path, mode="r", encoding="utf-8") as f:
+        content = await f.read()
+        payload: List[Dict] = json.loads(content)
+        [video.update({"group": my_path.name}) for video in payload]
+    return payload
+async def load_json_files(path_pattern: List[str]):
+    files = []
+    for f in path_pattern:
+        (files.extend(glob.glob(f, recursive=True)))
+    tasks = [load_single_json(f) for f in files]
+    results = await asyncio.gather(*tasks)
+    return [item for sublist in results for item in sublist]  # flatten
 class VideoTranscriptBulkLoader(BaseLoader):
     """Loads video transcripts as a bulk into documents"""
             if "url" in metadata:
                 metadata["source"] = metadata.pop("url")
             yield Document(
+                page_content="\n".join(
+                    t["sent"] for t in video["transcripts"]
+                ),
                 metadata=metadata,
             )

pstuts_rag/pstuts_rag/rag.py CHANGED Viewed

@@ -7,9 +7,10 @@ This module provides the core RAG functionality, including:
 import json
 from multiprocessing import Value
 import uuid
 from operator import itemgetter
-from typing import Dict, List, Any
 from langchain_core.documents import Document
 from langchain_core.runnables import (
@@ -97,7 +98,7 @@ class RAGChainFactory:
         )
         text_w_references = "\n".join(
-            [answer.content, "**References**:", references]
         )
         output: AIMessage = answer.model_copy(
@@ -113,6 +114,20 @@ class RAGChainFactory:
         return output
     def __init__(
         self,
         retriever: VectorStoreRetriever,

 import json
 from multiprocessing import Value
+import re
 import uuid
 from operator import itemgetter
+from typing import Dict, List, Any, Tuple
 from langchain_core.documents import Document
 from langchain_core.runnables import (
         )
         text_w_references = "\n".join(
+            [answer.content, "**REFERENCES**", references]
         )
         output: AIMessage = answer.model_copy(
         return output
+    @staticmethod
+    def unpack_references(content: str) -> Tuple[str, str]:
+        parts = re.split(r"\*\*REFERENCES\*\*\s*", content, maxsplit=1)
+        if len(parts) == 2:
+            text = parts[0].rstrip()
+            references = parts[1].lstrip()
+            return text, references
+        else:
+            raise ValueError(
+                f"No '**References:**' section found in input:\n{content}"
+            )
     def __init__(
         self,
         retriever: VectorStoreRetriever,