Spaces:

nkasmanoff
/

ask-booker-speech

Sleeping

App Files Files Community

nkasmanoff commited on Apr 2

Commit

82071aa

1 Parent(s): 64e8657

update

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +96 -2
rag.py +52 -0
requirements.txt +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ **.env

app.py CHANGED Viewed

@@ -1,4 +1,98 @@
 import streamlit as st
-x = st.slider("Select a value")
-st.write(x, "squared is", x * x)

 import streamlit as st
+from rag import answer_question
+# Set page config
+st.set_page_config(
+    page_title="Cory Booker's Historic Filibuster RAG", page_icon="🇺🇸", layout="wide"
+)
+# Custom CSS
+st.markdown(
+    """
+    <style>
+    .main {
+        background-color: #f8f9fa;
+    }
+    .stButton>button {
+        background-color: #1a237e;
+        color: white;
+        border-radius: 5px;
+        padding: 10px 20px;
+        border: none;
+        font-weight: bold;
+    }
+    .stButton>button:hover {
+        background-color: #0d47a1;
+    }
+    .stTextArea>div>div>textarea {
+        border-radius: 5px;
+        border: 2px solid #e0e0e0;
+    }
+    .highlight-text {
+        background-color: #fff3cd;
+        padding: 2px 5px;
+        border-radius: 3px;
+        font-weight: bold;
+        color: #856404;
+    }
+    </style>
+""",
+    unsafe_allow_html=True,
+)
+# Header with American flag emoji
+st.title("🇺🇸 Cory Booker's Historic Filibuster RAG")
+st.markdown(
+    """
+    <div style='background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-top: 20px;'>
+        <h3 style='color: #1a237e;'>Source Material</h3>
+        <p>The transcript is derived from videos in this playlist:
+        <a href='https://www.youtube.com/playlist?list=PLeifkaZBt4JtdT8DZ7aftJ0lU0Q6Hfnvz' target='_blank'>YouTube Playlist</a></p>
+    </div>
+""",
+    unsafe_allow_html=True,
+)
+# Search section
+st.markdown(
+    """
+    <div style='background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-top: 20px;'>
+        <h2 style='color: #1a237e;'>Explore the Filibuster</h2>
+        <p>Ask questions about Senator Booker's historic <span class="highlight-text">25-hour filibuster</span> speech below:</p>
+    </div>
+""",
+    unsafe_allow_html=True,
+)
+text = st.text_area(
+    "Your Question",
+    height=150,
+    placeholder="What would you like to know about Senator Booker's historic filibuster?",
+)
+if st.button("Search", key="search_button"):
+    with st.spinner("Searching through the historic filibuster transcript..."):
+        response = answer_question(text)
+        st.markdown(
+            """
+            <div style='background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-top: 20px;'>
+                <h3 style='color: #1a237e;'>Answer:</h3>
+                <p style='font-size: 16px; line-height: 1.6;'>{}</p>
+            </div>
+        """.format(
+                response
+            ),
+            unsafe_allow_html=True,
+        )
+# Footer
+st.markdown(
+    """
+    <div style='text-align: center; margin-top: 40px; color: #666;'>
+        <p>🇺🇸 Celebrating American Democracy and the Power of Speech 🇺🇸</p>
+        <p style='font-size: 14px;'>A tribute to Senator Cory Booker's record-breaking <span class="highlight-text">25-hour filibuster</span></p>
+    </div>
+""",
+    unsafe_allow_html=True,
+)

rag.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import dspy
+from dotenv import load_dotenv
+load_dotenv()
+from langchain_community.vectorstores import Chroma
+from langchain_openai import OpenAIEmbeddings
+persist_directory = "embeddings_db"
+lm = dspy.LM("openai/gpt-4o-mini")
+dspy.configure(lm=lm)
+embedding_function = OpenAIEmbeddings(model="text-embedding-3-small")
+vectordb = Chroma(
+    persist_directory=persist_directory, embedding_function=embedding_function
+)
+retriever = vectordb.as_retriever()
+def retrieve(inputs):
+    docs = retriever.invoke(inputs["question"])
+    return docs
+def get_source_pages(docs):
+    source_pages = []
+    for doc in docs:
+        section = doc.metadata["source"].split("/")[-2]
+        page = doc.metadata["source"].split("/")[-1].split(".")[0]
+        source_pages.append(f"{section} - {page}")
+    source_pages = list(set(source_pages))
+    return source_pages
+class COT_RAG(dspy.Module):
+    def __init__(self):
+        self.respond = dspy.ChainOfThought("context, question -> response")
+    def forward(self, question):
+        question_ = (
+            "Please review this speech by Cory Booker and answer the question below."
+        )
+        question_ += f"\n\nQuestion: {question}"
+        docs = retrieve({"question": question_})
+        self.docs = docs
+        context = [doc.page_content for doc in docs]
+        return self.respond(context=context, question=question)
+def answer_question(question):
+    rag = COT_RAG()
+    answer = rag.forward(question)
+    return answer.response

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+dspy
+langchain-community>=0.0.10
+langchain-openai>=0.0.2
+langchain-text-splitters>=0.0.1
+chromadb>=0.4.22
+openai>=1.12.0
+python-dotenv>=1.0.0