Spaces:

getGO007
/

Chat_with_an_earnings_call_slide_deck

Sleeping

App Files Files Community

getGO007 commited on Apr 23

Commit

1d253ce

verified ·

1 Parent(s): 3d9fe53

Create app.py

Browse files

Files changed (1) hide show

app.py +106 -0

app.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import os
+import streamlit as st
+import asyncio
+import nest_asyncio
+nest_asyncio.apply()  # allow asyncio in Streamlit :contentReference[oaicite:3]{index=3}
+# ─── LlamaIndex & Parser Imports ────────────────────────────────
+from llama_index import StorageContext, load_index_from_storage, OpenAI
+from llama_parse import LlamaParse
+from llama_index import VectorStoreIndex
+from llama_index.embeddings.openai import OpenAIEmbedding
+from llama_index.core.workflow.workflow import Workflow
+from llama_index.core.workflow.step_decorator import step
+from llama_index.core.workflow.events import Event, StartEvent, StopEvent, Context
+from llama_index.core.memory import ChatMemoryBuffer
+# ─── Constants ───────────────────────────────────────────────────
+PDF_PATH   = "./data/my_doc.pdf"        # your single PDF
+INDEX_DIR  = "./index_data"
+SYSTEM_PROMPT = (
+    "You are an expert analyst, who excels in analyzing a company's earnings call deck. Answer questions ONLY from the indexed document."
+)
+# ─── Workflow Definition ─────────────────────────────────────────
+class ChatResponseEvent(Event):
+    response: str
+    memory: ChatMemoryBuffer
+class ChatWorkflow(Workflow):
+    @step
+    async def answer(self, ev: StartEvent) -> ChatResponseEvent:
+        # load index
+        storage = StorageContext.from_defaults(persist_dir=ev.index_dir)
+        index   = load_index_from_storage(storage)
+        # build chat engine with memory & prompt
+        chat_engine = index.as_chat_engine(
+            chat_mode="context",
+            memory=ev.memory,
+            system_prompt=ev.system_prompt,
+            llm=ev.llm
+        )  # EDADW chat mode :contentReference[oaicite:4]{index=4}
+        # single-turn chat
+        resp = chat_engine.chat(ev.query)
+        return ChatResponseEvent(response=resp.response, memory=ev.memory)
+    @step
+    async def finalize(self, ev: ChatResponseEvent) -> StopEvent:
+        return StopEvent(result=ev.response)
+# ─── Streamlit UI & Session State ────────────────────────────────
+st.set_page_config(page_title="PDF Chatbot", layout="wide")  # responsive layout :contentReference[oaicite:5]{index=5}
+st.title("📄 Chat with Your PDF")
+# 1) Ingest once
+if "index_ready" not in st.session_state:
+    docs = LlamaParse(
+        result_type="markdown",
+        content_guideline_instruction=(
+            "You are processing a company’s quarterly earnings-call slide deck. "
+            "For each slide, produce a clearly sectioned Markdown fragment that includes:\n\n"
+            "1. **Slide metadata**: slide number, title, and any subtitle or date\n"
+            "2. **Key bullet points**: preserve existing bullets, but rewrite for clarity\n"
+            "3. **Tables**: convert any tables into Markdown tables, capturing headers and all rows\n"
+            "4. **Charts & graphs**: summarize each chart/graph in prose, highlighting axes labels, trends, and top 3 data points or percentage changes\n"
+            "5. **Figures & images**: if there’s a figure caption, include it verbatim; otherwise, describe the visual in one sentence\n"
+            "6. **Numeric callouts**: pull out any KPIs (revenue, EPS, growth rates) into a “Metrics” subsection\n"
+            "7. **Overall slide summary**: a 1–2-sentence plain-English takeaway for the slide’s purpose or conclusion\n\n"
+            "Keep the output strictly in Markdown, using headings (`##`, `###`), lists (`-`), and tables syntax. "
+            "Do not include any LLM-specific commentary or markdown outside these rules."
+        )
+    ).load_data(PDF_PATH)  # tailored parsing instruction
+    idx = VectorStoreIndex.from_documents(
+        docs,
+        embed_model=OpenAIEmbedding(model_name="text-embedding-3-small")
+    )
+    idx.storage_context.persist(persist_dir=INDEX_DIR)
+    st.session_state.index_ready = True
+    st.success("📚 Indexed your document!")  # user feedback
+# 2) Initialize memory & workflow
+if "memory" not in st.session_state:
+    st.session_state.memory = ChatMemoryBuffer.from_defaults(
+        llm=OpenAI(model="gpt-4o"), token_limit=1500
+    )  # simple chat memory :contentReference[oaicite:6]{index=6}
+if "workflow" not in st.session_state:
+    st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
+# 3) User input
+user_input = st.text_input("Ask a question about the document:")
+if user_input:
+    stop_evt: StopEvent = asyncio.run(
+        st.session_state.workflow.run(
+            index_dir=INDEX_DIR,
+            query=user_input,
+            system_prompt=SYSTEM_PROMPT,
+            memory=st.session_state.memory,
+            llm=OpenAI(model="gpt-4o")
+        )
+    )
+    st.session_state.memory = stop_evt.memory
+    st.markdown(f"**Bot:** {stop_evt.result}")
+# 4) End Chat button
+if st.button("End Chat"):
+    st.write("Chat ended. Refresh to start over.")
+    st.stop()