Spaces:
Sleeping
Sleeping
Commit
ยท
82071aa
1
Parent(s):
64e8657
update
Browse files- .gitignore +1 -0
- app.py +96 -2
- rag.py +52 -0
- requirements.txt +7 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
**.env
|
app.py
CHANGED
@@ -1,4 +1,98 @@
|
|
1 |
import streamlit as st
|
|
|
2 |
|
3 |
-
|
4 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
from rag import answer_question
|
3 |
|
4 |
+
# Set page config
|
5 |
+
st.set_page_config(
|
6 |
+
page_title="Cory Booker's Historic Filibuster RAG", page_icon="๐บ๐ธ", layout="wide"
|
7 |
+
)
|
8 |
+
|
9 |
+
# Custom CSS
|
10 |
+
st.markdown(
|
11 |
+
"""
|
12 |
+
<style>
|
13 |
+
.main {
|
14 |
+
background-color: #f8f9fa;
|
15 |
+
}
|
16 |
+
.stButton>button {
|
17 |
+
background-color: #1a237e;
|
18 |
+
color: white;
|
19 |
+
border-radius: 5px;
|
20 |
+
padding: 10px 20px;
|
21 |
+
border: none;
|
22 |
+
font-weight: bold;
|
23 |
+
}
|
24 |
+
.stButton>button:hover {
|
25 |
+
background-color: #0d47a1;
|
26 |
+
}
|
27 |
+
.stTextArea>div>div>textarea {
|
28 |
+
border-radius: 5px;
|
29 |
+
border: 2px solid #e0e0e0;
|
30 |
+
}
|
31 |
+
.highlight-text {
|
32 |
+
background-color: #fff3cd;
|
33 |
+
padding: 2px 5px;
|
34 |
+
border-radius: 3px;
|
35 |
+
font-weight: bold;
|
36 |
+
color: #856404;
|
37 |
+
}
|
38 |
+
</style>
|
39 |
+
""",
|
40 |
+
unsafe_allow_html=True,
|
41 |
+
)
|
42 |
+
|
43 |
+
# Header with American flag emoji
|
44 |
+
st.title("๐บ๐ธ Cory Booker's Historic Filibuster RAG")
|
45 |
+
|
46 |
+
|
47 |
+
st.markdown(
|
48 |
+
"""
|
49 |
+
<div style='background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-top: 20px;'>
|
50 |
+
<h3 style='color: #1a237e;'>Source Material</h3>
|
51 |
+
<p>The transcript is derived from videos in this playlist:
|
52 |
+
<a href='https://www.youtube.com/playlist?list=PLeifkaZBt4JtdT8DZ7aftJ0lU0Q6Hfnvz' target='_blank'>YouTube Playlist</a></p>
|
53 |
+
</div>
|
54 |
+
""",
|
55 |
+
unsafe_allow_html=True,
|
56 |
+
)
|
57 |
+
# Search section
|
58 |
+
st.markdown(
|
59 |
+
"""
|
60 |
+
<div style='background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-top: 20px;'>
|
61 |
+
<h2 style='color: #1a237e;'>Explore the Filibuster</h2>
|
62 |
+
<p>Ask questions about Senator Booker's historic <span class="highlight-text">25-hour filibuster</span> speech below:</p>
|
63 |
+
</div>
|
64 |
+
""",
|
65 |
+
unsafe_allow_html=True,
|
66 |
+
)
|
67 |
+
|
68 |
+
text = st.text_area(
|
69 |
+
"Your Question",
|
70 |
+
height=150,
|
71 |
+
placeholder="What would you like to know about Senator Booker's historic filibuster?",
|
72 |
+
)
|
73 |
+
|
74 |
+
if st.button("Search", key="search_button"):
|
75 |
+
with st.spinner("Searching through the historic filibuster transcript..."):
|
76 |
+
response = answer_question(text)
|
77 |
+
st.markdown(
|
78 |
+
"""
|
79 |
+
<div style='background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-top: 20px;'>
|
80 |
+
<h3 style='color: #1a237e;'>Answer:</h3>
|
81 |
+
<p style='font-size: 16px; line-height: 1.6;'>{}</p>
|
82 |
+
</div>
|
83 |
+
""".format(
|
84 |
+
response
|
85 |
+
),
|
86 |
+
unsafe_allow_html=True,
|
87 |
+
)
|
88 |
+
|
89 |
+
# Footer
|
90 |
+
st.markdown(
|
91 |
+
"""
|
92 |
+
<div style='text-align: center; margin-top: 40px; color: #666;'>
|
93 |
+
<p>๐บ๐ธ Celebrating American Democracy and the Power of Speech ๐บ๐ธ</p>
|
94 |
+
<p style='font-size: 14px;'>A tribute to Senator Cory Booker's record-breaking <span class="highlight-text">25-hour filibuster</span></p>
|
95 |
+
</div>
|
96 |
+
""",
|
97 |
+
unsafe_allow_html=True,
|
98 |
+
)
|
rag.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import dspy
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
load_dotenv()
|
5 |
+
from langchain_community.vectorstores import Chroma
|
6 |
+
from langchain_openai import OpenAIEmbeddings
|
7 |
+
|
8 |
+
persist_directory = "embeddings_db"
|
9 |
+
|
10 |
+
lm = dspy.LM("openai/gpt-4o-mini")
|
11 |
+
dspy.configure(lm=lm)
|
12 |
+
embedding_function = OpenAIEmbeddings(model="text-embedding-3-small")
|
13 |
+
vectordb = Chroma(
|
14 |
+
persist_directory=persist_directory, embedding_function=embedding_function
|
15 |
+
)
|
16 |
+
retriever = vectordb.as_retriever()
|
17 |
+
|
18 |
+
|
19 |
+
def retrieve(inputs):
|
20 |
+
docs = retriever.invoke(inputs["question"])
|
21 |
+
return docs
|
22 |
+
|
23 |
+
|
24 |
+
def get_source_pages(docs):
|
25 |
+
source_pages = []
|
26 |
+
for doc in docs:
|
27 |
+
section = doc.metadata["source"].split("/")[-2]
|
28 |
+
page = doc.metadata["source"].split("/")[-1].split(".")[0]
|
29 |
+
source_pages.append(f"{section} - {page}")
|
30 |
+
source_pages = list(set(source_pages))
|
31 |
+
return source_pages
|
32 |
+
|
33 |
+
|
34 |
+
class COT_RAG(dspy.Module):
|
35 |
+
def __init__(self):
|
36 |
+
self.respond = dspy.ChainOfThought("context, question -> response")
|
37 |
+
|
38 |
+
def forward(self, question):
|
39 |
+
question_ = (
|
40 |
+
"Please review this speech by Cory Booker and answer the question below."
|
41 |
+
)
|
42 |
+
question_ += f"\n\nQuestion: {question}"
|
43 |
+
docs = retrieve({"question": question_})
|
44 |
+
self.docs = docs
|
45 |
+
context = [doc.page_content for doc in docs]
|
46 |
+
return self.respond(context=context, question=question)
|
47 |
+
|
48 |
+
|
49 |
+
def answer_question(question):
|
50 |
+
rag = COT_RAG()
|
51 |
+
answer = rag.forward(question)
|
52 |
+
return answer.response
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dspy
|
2 |
+
langchain-community>=0.0.10
|
3 |
+
langchain-openai>=0.0.2
|
4 |
+
langchain-text-splitters>=0.0.1
|
5 |
+
chromadb>=0.4.22
|
6 |
+
openai>=1.12.0
|
7 |
+
python-dotenv>=1.0.0
|