Spaces:

getGO007
/

Chat_with_an_earnings_call_slide_deck

Sleeping

App Files Files Community

getGO007 commited on Apr 24

Commit

594bcbb

verified ·

1 Parent(s): 1d39854

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -35

app.py CHANGED Viewed

@@ -1,12 +1,11 @@
 import os
 import streamlit as st
 import nest_asyncio
-nest_asyncio.apply()  # allow asyncio in Streamlit :contentReference[oaicite:3]{index=3}
 import asyncio
-# Create (or get) the running loop for all Workflow calls
-loop = asyncio.new_event_loop()
-asyncio.set_event_loop(loop)
 # ─── LlamaIndex & Parser Imports ────────────────────────────────
 from llama_index.core import StorageContext, load_index_from_storage
@@ -18,10 +17,11 @@ from llama_index.core.workflow import Event, StartEvent, StopEvent, Workflow, st
 from llama_index.core.memory import ChatMemoryBuffer
 # ─── Constants ───────────────────────────────────────────────────
-PDF_PATH   = "./data/bank-of-america.pdf"        # your single PDF
-INDEX_DIR  = "./index_data"
 SYSTEM_PROMPT = (
-    "You are an expert analyst, who excels in analyzing a company's earnings call deck. Answer questions ONLY from the indexed document."
 )
 # ─── Workflow Definition ─────────────────────────────────────────
@@ -32,17 +32,16 @@ class ChatResponseEvent(Event):
 class ChatWorkflow(Workflow):
     @step
     async def answer(self, ev: StartEvent) -> ChatResponseEvent:
-        # load index
         storage = StorageContext.from_defaults(persist_dir=ev.index_dir)
         index   = load_index_from_storage(storage)
-        # build chat engine with memory & prompt
         chat_engine = index.as_chat_engine(
             chat_mode="context",
             memory=ev.memory,
             system_prompt=ev.system_prompt,
             llm=ev.llm
-        )  # EDADW chat mode :contentReference[oaicite:4]{index=4}
-        # single-turn chat
         resp = chat_engine.chat(ev.query)
         return ChatResponseEvent(response=resp.response, memory=ev.memory)
@@ -51,33 +50,22 @@ class ChatWorkflow(Workflow):
         return StopEvent(result=ev.response)
 # ─── Streamlit UI & Session State ────────────────────────────────
-st.set_page_config(page_title="PDF Chatbot", layout="wide")  # responsive layout :contentReference[oaicite:5]{index=5}
 st.title("📄 Chat with Your PDF")
-# 1) Ingest once or load existing index via index_store.json
 if "index_ready" not in st.session_state:
-    os.makedirs(INDEX_DIR, exist_ok=True)  # ensure folder exists
     index_meta = os.path.join(INDEX_DIR, "index_store.json")
     if os.path.isfile(index_meta):
-        # Found LlamaIndex metadata → reuse existing index
         st.session_state.index_ready = True
         st.success("📚 Loaded existing index from index_store.json!")
     else:
-        # No index_store.json → build index now
         docs = LlamaParse(
             result_type="markdown",
             content_guideline_instruction=(
                 "You are processing a company’s quarterly earnings-call slide deck. "
-                "For each slide, produce a clearly sectioned Markdown fragment that includes:\n\n"
-                "1. **Slide metadata**: slide number, title, and any subtitle or date\n"
-                "2. **Key bullet points**: preserve existing bullets, but rewrite for clarity\n"
-                "3. **Tables**: convert any tables into Markdown tables, capturing headers and all rows\n"
-                "4. **Charts & graphs**: summarize each chart/graph in prose, highlighting axes labels, trends, and top 3 data points or percentage changes\n"
-                "5. **Figures & images**: if there’s a figure caption, include it verbatim; otherwise, describe the visual in one sentence\n"
-                "6. **Numeric callouts**: pull out any KPIs (revenue, EPS, growth rates) into a “Metrics” subsection\n"
-                "7. **Overall slide summary**: a 1–2-sentence plain-English takeaway for the slide’s purpose or conclusion\n\n"
-                "Keep the output strictly in Markdown, using headings (`##`, `###`), lists (`-`), and tables syntax. "
-                "Do not include any LLM-specific commentary or markdown outside these rules."
             )
         ).load_data(PDF_PATH)
         idx = VectorStoreIndex.from_documents(
@@ -86,34 +74,38 @@ if "index_ready" not in st.session_state:
         )
         idx.storage_context.persist(persist_dir=INDEX_DIR)
         st.session_state.index_ready = True
-        st.success("📚 Indexed your document and created index_store.json!") # user feedback
 # 2) Initialize memory & workflow
 if "memory" not in st.session_state:
     st.session_state.memory = ChatMemoryBuffer.from_defaults(
         llm=OpenAI(model="gpt-4o"), token_limit=1500
-    )  # simple chat memory :contentReference[oaicite:6]{index=6}
 if "workflow" not in st.session_state:
     st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
-# 3) User input
 user_input = st.text_input("Ask a question about the document:")
 if user_input:
-    # Drive the async workflow.run() on our loop
-    stop_evt: StopEvent = loop.run_until_complete(
         st.session_state.workflow.run(
             index_dir=INDEX_DIR,
             query=user_input,
             system_prompt=SYSTEM_PROMPT,
             memory=st.session_state.memory,
             llm=OpenAI(model="gpt-4o")
-        )
     )
     st.session_state.memory = stop_evt.memory
     st.markdown(f"**Bot:** {stop_evt.result}")
-# 4) End Chat button
 if st.button("End Chat"):
     st.write("Chat ended. Refresh to start over.")
-    st.stop()

 import os
 import streamlit as st
 import nest_asyncio
+# ─── PATCH STREAMLIT’S LOOP ──────────────────────────────────────
+nest_asyncio.apply()                     # allow nested awaits on Tornado’s loop
 import asyncio
+loop = asyncio.get_event_loop()         # grab the running Streamlit/Tornado loop
 # ─── LlamaIndex & Parser Imports ────────────────────────────────
 from llama_index.core import StorageContext, load_index_from_storage
 from llama_index.core.memory import ChatMemoryBuffer
 # ─── Constants ───────────────────────────────────────────────────
+PDF_PATH     = "./data/bank-of-america.pdf"
+INDEX_DIR    = "./index_data"
 SYSTEM_PROMPT = (
+    "You are an expert analyst, who excels in analyzing a company's earnings call deck. "
+    "Answer questions ONLY from the indexed document."
 )
 # ─── Workflow Definition ─────────────────────────────────────────
 class ChatWorkflow(Workflow):
     @step
     async def answer(self, ev: StartEvent) -> ChatResponseEvent:
         storage = StorageContext.from_defaults(persist_dir=ev.index_dir)
         index   = load_index_from_storage(storage)
         chat_engine = index.as_chat_engine(
             chat_mode="context",
             memory=ev.memory,
             system_prompt=ev.system_prompt,
             llm=ev.llm
+        )
+        # Use sync call inside async step—but it's fine since it's small;
+        # you could also `await chat_engine.achat(...)` if available
         resp = chat_engine.chat(ev.query)
         return ChatResponseEvent(response=resp.response, memory=ev.memory)
         return StopEvent(result=ev.response)
 # ─── Streamlit UI & Session State ────────────────────────────────
+st.set_page_config(page_title="PDF Chatbot", layout="wide")
 st.title("📄 Chat with Your PDF")
+# 1) Build or load the index once
 if "index_ready" not in st.session_state:
+    os.makedirs(INDEX_DIR, exist_ok=True)
     index_meta = os.path.join(INDEX_DIR, "index_store.json")
     if os.path.isfile(index_meta):
         st.session_state.index_ready = True
         st.success("📚 Loaded existing index from index_store.json!")
     else:
         docs = LlamaParse(
             result_type="markdown",
             content_guideline_instruction=(
                 "You are processing a company’s quarterly earnings-call slide deck. "
+                "For each slide, produce a clearly sectioned Markdown fragment..."
             )
         ).load_data(PDF_PATH)
         idx = VectorStoreIndex.from_documents(
         )
         idx.storage_context.persist(persist_dir=INDEX_DIR)
         st.session_state.index_ready = True
+        st.success("📚 Indexed your document and created index_store.json!")
 # 2) Initialize memory & workflow
 if "memory" not in st.session_state:
     st.session_state.memory = ChatMemoryBuffer.from_defaults(
         llm=OpenAI(model="gpt-4o"), token_limit=1500
+    )
 if "workflow" not in st.session_state:
     st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
+# 3) User input & async invocation
 user_input = st.text_input("Ask a question about the document:")
 if user_input:
+    # Schedule the coroutine on Streamlit's running loop
+    future = asyncio.run_coroutine_threadsafe(
         st.session_state.workflow.run(
             index_dir=INDEX_DIR,
             query=user_input,
             system_prompt=SYSTEM_PROMPT,
             memory=st.session_state.memory,
             llm=OpenAI(model="gpt-4o")
+        ),
+        loop
     )
+    # Wait for it to finish (non-blocking at the loop level)
+    stop_evt: StopEvent = future.result()
+    # Update session state & display
     st.session_state.memory = stop_evt.memory
     st.markdown(f"**Bot:** {stop_evt.result}")
+# 4) End Chat
 if st.button("End Chat"):
     st.write("Chat ended. Refresh to start over.")
+    st.stop()