Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
import nest_asyncio
|
4 |
-
nest_asyncio.apply() # allow asyncio in Streamlit :contentReference[oaicite:3]{index=3}
|
5 |
|
|
|
|
|
6 |
import asyncio
|
7 |
-
|
8 |
-
loop = asyncio.new_event_loop()
|
9 |
-
asyncio.set_event_loop(loop)
|
10 |
|
11 |
# βββ LlamaIndex & Parser Imports ββββββββββββββββββββββββββββββββ
|
12 |
from llama_index.core import StorageContext, load_index_from_storage
|
@@ -18,10 +17,11 @@ from llama_index.core.workflow import Event, StartEvent, StopEvent, Workflow, st
|
|
18 |
from llama_index.core.memory import ChatMemoryBuffer
|
19 |
|
20 |
# βββ Constants βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
21 |
-
PDF_PATH
|
22 |
-
INDEX_DIR
|
23 |
SYSTEM_PROMPT = (
|
24 |
-
"You are an expert analyst, who excels in analyzing a company's earnings call deck.
|
|
|
25 |
)
|
26 |
|
27 |
# βββ Workflow Definition βββββββββββββββββββββββββββββββββββββββββ
|
@@ -32,17 +32,16 @@ class ChatResponseEvent(Event):
|
|
32 |
class ChatWorkflow(Workflow):
|
33 |
@step
|
34 |
async def answer(self, ev: StartEvent) -> ChatResponseEvent:
|
35 |
-
# load index
|
36 |
storage = StorageContext.from_defaults(persist_dir=ev.index_dir)
|
37 |
index = load_index_from_storage(storage)
|
38 |
-
# build chat engine with memory & prompt
|
39 |
chat_engine = index.as_chat_engine(
|
40 |
chat_mode="context",
|
41 |
memory=ev.memory,
|
42 |
system_prompt=ev.system_prompt,
|
43 |
llm=ev.llm
|
44 |
-
)
|
45 |
-
#
|
|
|
46 |
resp = chat_engine.chat(ev.query)
|
47 |
return ChatResponseEvent(response=resp.response, memory=ev.memory)
|
48 |
|
@@ -51,33 +50,22 @@ class ChatWorkflow(Workflow):
|
|
51 |
return StopEvent(result=ev.response)
|
52 |
|
53 |
# βββ Streamlit UI & Session State ββββββββββββββββββββββββββββββββ
|
54 |
-
st.set_page_config(page_title="PDF Chatbot", layout="wide")
|
55 |
st.title("π Chat with Your PDF")
|
56 |
|
57 |
-
# 1)
|
58 |
if "index_ready" not in st.session_state:
|
59 |
-
os.makedirs(INDEX_DIR, exist_ok=True)
|
60 |
index_meta = os.path.join(INDEX_DIR, "index_store.json")
|
61 |
if os.path.isfile(index_meta):
|
62 |
-
# Found LlamaIndex metadata β reuse existing index
|
63 |
st.session_state.index_ready = True
|
64 |
st.success("π Loaded existing index from index_store.json!")
|
65 |
else:
|
66 |
-
# No index_store.json β build index now
|
67 |
docs = LlamaParse(
|
68 |
result_type="markdown",
|
69 |
content_guideline_instruction=(
|
70 |
"You are processing a companyβs quarterly earnings-call slide deck. "
|
71 |
-
"For each slide, produce a clearly sectioned Markdown fragment
|
72 |
-
"1. **Slide metadata**: slide number, title, and any subtitle or date\n"
|
73 |
-
"2. **Key bullet points**: preserve existing bullets, but rewrite for clarity\n"
|
74 |
-
"3. **Tables**: convert any tables into Markdown tables, capturing headers and all rows\n"
|
75 |
-
"4. **Charts & graphs**: summarize each chart/graph in prose, highlighting axes labels, trends, and top 3 data points or percentage changes\n"
|
76 |
-
"5. **Figures & images**: if thereβs a figure caption, include it verbatim; otherwise, describe the visual in one sentence\n"
|
77 |
-
"6. **Numeric callouts**: pull out any KPIs (revenue, EPS, growth rates) into a βMetricsβ subsection\n"
|
78 |
-
"7. **Overall slide summary**: a 1β2-sentence plain-English takeaway for the slideβs purpose or conclusion\n\n"
|
79 |
-
"Keep the output strictly in Markdown, using headings (`##`, `###`), lists (`-`), and tables syntax. "
|
80 |
-
"Do not include any LLM-specific commentary or markdown outside these rules."
|
81 |
)
|
82 |
).load_data(PDF_PATH)
|
83 |
idx = VectorStoreIndex.from_documents(
|
@@ -86,34 +74,38 @@ if "index_ready" not in st.session_state:
|
|
86 |
)
|
87 |
idx.storage_context.persist(persist_dir=INDEX_DIR)
|
88 |
st.session_state.index_ready = True
|
89 |
-
st.success("π Indexed your document and created index_store.json!")
|
90 |
-
|
91 |
|
92 |
# 2) Initialize memory & workflow
|
93 |
if "memory" not in st.session_state:
|
94 |
st.session_state.memory = ChatMemoryBuffer.from_defaults(
|
95 |
llm=OpenAI(model="gpt-4o"), token_limit=1500
|
96 |
-
)
|
97 |
if "workflow" not in st.session_state:
|
98 |
st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
|
99 |
|
100 |
-
# 3) User input
|
101 |
user_input = st.text_input("Ask a question about the document:")
|
102 |
if user_input:
|
103 |
-
#
|
104 |
-
|
105 |
st.session_state.workflow.run(
|
106 |
index_dir=INDEX_DIR,
|
107 |
query=user_input,
|
108 |
system_prompt=SYSTEM_PROMPT,
|
109 |
memory=st.session_state.memory,
|
110 |
llm=OpenAI(model="gpt-4o")
|
111 |
-
)
|
|
|
112 |
)
|
|
|
|
|
|
|
|
|
113 |
st.session_state.memory = stop_evt.memory
|
114 |
st.markdown(f"**Bot:** {stop_evt.result}")
|
115 |
|
116 |
-
# 4) End Chat
|
117 |
if st.button("End Chat"):
|
118 |
st.write("Chat ended. Refresh to start over.")
|
119 |
-
st.stop()
|
|
|
1 |
import os
|
2 |
import streamlit as st
|
3 |
import nest_asyncio
|
|
|
4 |
|
5 |
+
# βββ PATCH STREAMLITβS LOOP ββββββββββββββββββββββββββββββββββββββ
|
6 |
+
nest_asyncio.apply() # allow nested awaits on Tornadoβs loop
|
7 |
import asyncio
|
8 |
+
loop = asyncio.get_event_loop() # grab the running Streamlit/Tornado loop
|
|
|
|
|
9 |
|
10 |
# βββ LlamaIndex & Parser Imports ββββββββββββββββββββββββββββββββ
|
11 |
from llama_index.core import StorageContext, load_index_from_storage
|
|
|
17 |
from llama_index.core.memory import ChatMemoryBuffer
|
18 |
|
19 |
# βββ Constants βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
20 |
+
PDF_PATH = "./data/bank-of-america.pdf"
|
21 |
+
INDEX_DIR = "./index_data"
|
22 |
SYSTEM_PROMPT = (
|
23 |
+
"You are an expert analyst, who excels in analyzing a company's earnings call deck. "
|
24 |
+
"Answer questions ONLY from the indexed document."
|
25 |
)
|
26 |
|
27 |
# βββ Workflow Definition βββββββββββββββββββββββββββββββββββββββββ
|
|
|
32 |
class ChatWorkflow(Workflow):
|
33 |
@step
|
34 |
async def answer(self, ev: StartEvent) -> ChatResponseEvent:
|
|
|
35 |
storage = StorageContext.from_defaults(persist_dir=ev.index_dir)
|
36 |
index = load_index_from_storage(storage)
|
|
|
37 |
chat_engine = index.as_chat_engine(
|
38 |
chat_mode="context",
|
39 |
memory=ev.memory,
|
40 |
system_prompt=ev.system_prompt,
|
41 |
llm=ev.llm
|
42 |
+
)
|
43 |
+
# Use sync call inside async stepβbut it's fine since it's small;
|
44 |
+
# you could also `await chat_engine.achat(...)` if available
|
45 |
resp = chat_engine.chat(ev.query)
|
46 |
return ChatResponseEvent(response=resp.response, memory=ev.memory)
|
47 |
|
|
|
50 |
return StopEvent(result=ev.response)
|
51 |
|
52 |
# βββ Streamlit UI & Session State ββββββββββββββββββββββββββββββββ
|
53 |
+
st.set_page_config(page_title="PDF Chatbot", layout="wide")
|
54 |
st.title("π Chat with Your PDF")
|
55 |
|
56 |
+
# 1) Build or load the index once
|
57 |
if "index_ready" not in st.session_state:
|
58 |
+
os.makedirs(INDEX_DIR, exist_ok=True)
|
59 |
index_meta = os.path.join(INDEX_DIR, "index_store.json")
|
60 |
if os.path.isfile(index_meta):
|
|
|
61 |
st.session_state.index_ready = True
|
62 |
st.success("π Loaded existing index from index_store.json!")
|
63 |
else:
|
|
|
64 |
docs = LlamaParse(
|
65 |
result_type="markdown",
|
66 |
content_guideline_instruction=(
|
67 |
"You are processing a companyβs quarterly earnings-call slide deck. "
|
68 |
+
"For each slide, produce a clearly sectioned Markdown fragment..."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
)
|
70 |
).load_data(PDF_PATH)
|
71 |
idx = VectorStoreIndex.from_documents(
|
|
|
74 |
)
|
75 |
idx.storage_context.persist(persist_dir=INDEX_DIR)
|
76 |
st.session_state.index_ready = True
|
77 |
+
st.success("π Indexed your document and created index_store.json!")
|
|
|
78 |
|
79 |
# 2) Initialize memory & workflow
|
80 |
if "memory" not in st.session_state:
|
81 |
st.session_state.memory = ChatMemoryBuffer.from_defaults(
|
82 |
llm=OpenAI(model="gpt-4o"), token_limit=1500
|
83 |
+
)
|
84 |
if "workflow" not in st.session_state:
|
85 |
st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
|
86 |
|
87 |
+
# 3) User input & async invocation
|
88 |
user_input = st.text_input("Ask a question about the document:")
|
89 |
if user_input:
|
90 |
+
# Schedule the coroutine on Streamlit's running loop
|
91 |
+
future = asyncio.run_coroutine_threadsafe(
|
92 |
st.session_state.workflow.run(
|
93 |
index_dir=INDEX_DIR,
|
94 |
query=user_input,
|
95 |
system_prompt=SYSTEM_PROMPT,
|
96 |
memory=st.session_state.memory,
|
97 |
llm=OpenAI(model="gpt-4o")
|
98 |
+
),
|
99 |
+
loop
|
100 |
)
|
101 |
+
# Wait for it to finish (non-blocking at the loop level)
|
102 |
+
stop_evt: StopEvent = future.result()
|
103 |
+
|
104 |
+
# Update session state & display
|
105 |
st.session_state.memory = stop_evt.memory
|
106 |
st.markdown(f"**Bot:** {stop_evt.result}")
|
107 |
|
108 |
+
# 4) End Chat
|
109 |
if st.button("End Chat"):
|
110 |
st.write("Chat ended. Refresh to start over.")
|
111 |
+
st.stop()
|