nkasmanoff commited on
Commit
82071aa
ยท
1 Parent(s): 64e8657
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +96 -2
  3. rag.py +52 -0
  4. requirements.txt +7 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ **.env
app.py CHANGED
@@ -1,4 +1,98 @@
1
  import streamlit as st
 
2
 
3
- x = st.slider("Select a value")
4
- st.write(x, "squared is", x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from rag import answer_question
3
 
4
+ # Set page config
5
+ st.set_page_config(
6
+ page_title="Cory Booker's Historic Filibuster RAG", page_icon="๐Ÿ‡บ๐Ÿ‡ธ", layout="wide"
7
+ )
8
+
9
+ # Custom CSS
10
+ st.markdown(
11
+ """
12
+ <style>
13
+ .main {
14
+ background-color: #f8f9fa;
15
+ }
16
+ .stButton>button {
17
+ background-color: #1a237e;
18
+ color: white;
19
+ border-radius: 5px;
20
+ padding: 10px 20px;
21
+ border: none;
22
+ font-weight: bold;
23
+ }
24
+ .stButton>button:hover {
25
+ background-color: #0d47a1;
26
+ }
27
+ .stTextArea>div>div>textarea {
28
+ border-radius: 5px;
29
+ border: 2px solid #e0e0e0;
30
+ }
31
+ .highlight-text {
32
+ background-color: #fff3cd;
33
+ padding: 2px 5px;
34
+ border-radius: 3px;
35
+ font-weight: bold;
36
+ color: #856404;
37
+ }
38
+ </style>
39
+ """,
40
+ unsafe_allow_html=True,
41
+ )
42
+
43
+ # Header with American flag emoji
44
+ st.title("๐Ÿ‡บ๐Ÿ‡ธ Cory Booker's Historic Filibuster RAG")
45
+
46
+
47
+ st.markdown(
48
+ """
49
+ <div style='background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-top: 20px;'>
50
+ <h3 style='color: #1a237e;'>Source Material</h3>
51
+ <p>The transcript is derived from videos in this playlist:
52
+ <a href='https://www.youtube.com/playlist?list=PLeifkaZBt4JtdT8DZ7aftJ0lU0Q6Hfnvz' target='_blank'>YouTube Playlist</a></p>
53
+ </div>
54
+ """,
55
+ unsafe_allow_html=True,
56
+ )
57
+ # Search section
58
+ st.markdown(
59
+ """
60
+ <div style='background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-top: 20px;'>
61
+ <h2 style='color: #1a237e;'>Explore the Filibuster</h2>
62
+ <p>Ask questions about Senator Booker's historic <span class="highlight-text">25-hour filibuster</span> speech below:</p>
63
+ </div>
64
+ """,
65
+ unsafe_allow_html=True,
66
+ )
67
+
68
+ text = st.text_area(
69
+ "Your Question",
70
+ height=150,
71
+ placeholder="What would you like to know about Senator Booker's historic filibuster?",
72
+ )
73
+
74
+ if st.button("Search", key="search_button"):
75
+ with st.spinner("Searching through the historic filibuster transcript..."):
76
+ response = answer_question(text)
77
+ st.markdown(
78
+ """
79
+ <div style='background-color: #ffffff; padding: 20px; border-radius: 10px; box-shadow: 0 2px 4px rgba(0,0,0,0.1); margin-top: 20px;'>
80
+ <h3 style='color: #1a237e;'>Answer:</h3>
81
+ <p style='font-size: 16px; line-height: 1.6;'>{}</p>
82
+ </div>
83
+ """.format(
84
+ response
85
+ ),
86
+ unsafe_allow_html=True,
87
+ )
88
+
89
+ # Footer
90
+ st.markdown(
91
+ """
92
+ <div style='text-align: center; margin-top: 40px; color: #666;'>
93
+ <p>๐Ÿ‡บ๐Ÿ‡ธ Celebrating American Democracy and the Power of Speech ๐Ÿ‡บ๐Ÿ‡ธ</p>
94
+ <p style='font-size: 14px;'>A tribute to Senator Cory Booker's record-breaking <span class="highlight-text">25-hour filibuster</span></p>
95
+ </div>
96
+ """,
97
+ unsafe_allow_html=True,
98
+ )
rag.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import dspy
2
+ from dotenv import load_dotenv
3
+
4
+ load_dotenv()
5
+ from langchain_community.vectorstores import Chroma
6
+ from langchain_openai import OpenAIEmbeddings
7
+
8
+ persist_directory = "embeddings_db"
9
+
10
+ lm = dspy.LM("openai/gpt-4o-mini")
11
+ dspy.configure(lm=lm)
12
+ embedding_function = OpenAIEmbeddings(model="text-embedding-3-small")
13
+ vectordb = Chroma(
14
+ persist_directory=persist_directory, embedding_function=embedding_function
15
+ )
16
+ retriever = vectordb.as_retriever()
17
+
18
+
19
+ def retrieve(inputs):
20
+ docs = retriever.invoke(inputs["question"])
21
+ return docs
22
+
23
+
24
+ def get_source_pages(docs):
25
+ source_pages = []
26
+ for doc in docs:
27
+ section = doc.metadata["source"].split("/")[-2]
28
+ page = doc.metadata["source"].split("/")[-1].split(".")[0]
29
+ source_pages.append(f"{section} - {page}")
30
+ source_pages = list(set(source_pages))
31
+ return source_pages
32
+
33
+
34
+ class COT_RAG(dspy.Module):
35
+ def __init__(self):
36
+ self.respond = dspy.ChainOfThought("context, question -> response")
37
+
38
+ def forward(self, question):
39
+ question_ = (
40
+ "Please review this speech by Cory Booker and answer the question below."
41
+ )
42
+ question_ += f"\n\nQuestion: {question}"
43
+ docs = retrieve({"question": question_})
44
+ self.docs = docs
45
+ context = [doc.page_content for doc in docs]
46
+ return self.respond(context=context, question=question)
47
+
48
+
49
+ def answer_question(question):
50
+ rag = COT_RAG()
51
+ answer = rag.forward(question)
52
+ return answer.response
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ dspy
2
+ langchain-community>=0.0.10
3
+ langchain-openai>=0.0.2
4
+ langchain-text-splitters>=0.0.1
5
+ chromadb>=0.4.22
6
+ openai>=1.12.0
7
+ python-dotenv>=1.0.0