Spaces:

getGO007
/

Chat_with_an_earnings_call_slide_deck

Sleeping

App Files Files Community

getGO007 commited on Apr 23

Commit

3e27771

verified ·

1 Parent(s): ada090d

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -23

app.py CHANGED Viewed

@@ -50,31 +50,39 @@ class ChatWorkflow(Workflow):
 st.set_page_config(page_title="PDF Chatbot", layout="wide")  # responsive layout :contentReference[oaicite:5]{index=5}
 st.title("📄 Chat with Your PDF")
-# 1) Ingest once
 if "index_ready" not in st.session_state:
-    docs = LlamaParse(
-        result_type="markdown",
-        content_guideline_instruction=(
-            "You are processing a company’s quarterly earnings-call slide deck. "
-            "For each slide, produce a clearly sectioned Markdown fragment that includes:\n\n"
-            "1. **Slide metadata**: slide number, title, and any subtitle or date\n"
-            "2. **Key bullet points**: preserve existing bullets, but rewrite for clarity\n"
-            "3. **Tables**: convert any tables into Markdown tables, capturing headers and all rows\n"
-            "4. **Charts & graphs**: summarize each chart/graph in prose, highlighting axes labels, trends, and top 3 data points or percentage changes\n"
-            "5. **Figures & images**: if there’s a figure caption, include it verbatim; otherwise, describe the visual in one sentence\n"
-            "6. **Numeric callouts**: pull out any KPIs (revenue, EPS, growth rates) into a “Metrics” subsection\n"
-            "7. **Overall slide summary**: a 1–2-sentence plain-English takeaway for the slide’s purpose or conclusion\n\n"
-            "Keep the output strictly in Markdown, using headings (`##`, `###`), lists (`-`), and tables syntax. "
-            "Do not include any LLM-specific commentary or markdown outside these rules."
         )
-    ).load_data(PDF_PATH)  # tailored parsing instruction
-    idx = VectorStoreIndex.from_documents(
-        docs,
-        embed_model=OpenAIEmbedding(model_name="text-embedding-3-small")
-    )
-    idx.storage_context.persist(persist_dir=INDEX_DIR)
-    st.session_state.index_ready = True
-    st.success("📚 Indexed your document!")  # user feedback
 # 2) Initialize memory & workflow
 if "memory" not in st.session_state:

 st.set_page_config(page_title="PDF Chatbot", layout="wide")  # responsive layout :contentReference[oaicite:5]{index=5}
 st.title("📄 Chat with Your PDF")
+# 1) Ingest once or load existing index via index_store.json
 if "index_ready" not in st.session_state:
+    index_meta = os.path.join(INDEX_DIR, "index_store.json")
+    if os.path.isfile(index_meta):
+        # Found LlamaIndex metadata → reuse existing index
+        st.session_state.index_ready = True
+        st.success("📚 Loaded existing index from index_store.json!")
+    else:
+        # No index_store.json → build index now
+        docs = LlamaParse(
+            result_type="markdown",
+            content_guideline_instruction=(
+                "You are processing a company’s quarterly earnings-call slide deck. "
+                "For each slide, produce a clearly sectioned Markdown fragment that includes:\n\n"
+                "1. **Slide metadata**: slide number, title, and any subtitle or date\n"
+                "2. **Key bullet points**: preserve existing bullets, but rewrite for clarity\n"
+                "3. **Tables**: convert any tables into Markdown tables, capturing headers and all rows\n"
+                "4. **Charts & graphs**: summarize each chart/graph in prose, highlighting axes labels, trends, and top 3 data points or percentage changes\n"
+                "5. **Figures & images**: if there’s a figure caption, include it verbatim; otherwise, describe the visual in one sentence\n"
+                "6. **Numeric callouts**: pull out any KPIs (revenue, EPS, growth rates) into a “Metrics” subsection\n"
+                "7. **Overall slide summary**: a 1–2-sentence plain-English takeaway for the slide’s purpose or conclusion\n\n"
+                "Keep the output strictly in Markdown, using headings (`##`, `###`), lists (`-`), and tables syntax. "
+                "Do not include any LLM-specific commentary or markdown outside these rules."
+            )
+        ).load_data(PDF_PATH)
+        idx = VectorStoreIndex.from_documents(
+            docs,
+            embed_model=OpenAIEmbedding(model_name="text-embedding-3-small")
         )
+        idx.storage_context.persist(persist_dir=INDEX_DIR)
+        st.session_state.index_ready = True
+        st.success("📚 Indexed your document and created index_store.json!") # user feedback
 # 2) Initialize memory & workflow
 if "memory" not in st.session_state: