getGO007 commited on
Commit
594bcbb
Β·
verified Β·
1 Parent(s): 1d39854

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -35
app.py CHANGED
@@ -1,12 +1,11 @@
1
  import os
2
  import streamlit as st
3
  import nest_asyncio
4
- nest_asyncio.apply() # allow asyncio in Streamlit :contentReference[oaicite:3]{index=3}
5
 
 
 
6
  import asyncio
7
- # Create (or get) the running loop for all Workflow calls
8
- loop = asyncio.new_event_loop()
9
- asyncio.set_event_loop(loop)
10
 
11
  # ─── LlamaIndex & Parser Imports ────────────────────────────────
12
  from llama_index.core import StorageContext, load_index_from_storage
@@ -18,10 +17,11 @@ from llama_index.core.workflow import Event, StartEvent, StopEvent, Workflow, st
18
  from llama_index.core.memory import ChatMemoryBuffer
19
 
20
  # ─── Constants ───────────────────────────────────────────────────
21
- PDF_PATH = "./data/bank-of-america.pdf" # your single PDF
22
- INDEX_DIR = "./index_data"
23
  SYSTEM_PROMPT = (
24
- "You are an expert analyst, who excels in analyzing a company's earnings call deck. Answer questions ONLY from the indexed document."
 
25
  )
26
 
27
  # ─── Workflow Definition ─────────────────────────────────────────
@@ -32,17 +32,16 @@ class ChatResponseEvent(Event):
32
  class ChatWorkflow(Workflow):
33
  @step
34
  async def answer(self, ev: StartEvent) -> ChatResponseEvent:
35
- # load index
36
  storage = StorageContext.from_defaults(persist_dir=ev.index_dir)
37
  index = load_index_from_storage(storage)
38
- # build chat engine with memory & prompt
39
  chat_engine = index.as_chat_engine(
40
  chat_mode="context",
41
  memory=ev.memory,
42
  system_prompt=ev.system_prompt,
43
  llm=ev.llm
44
- ) # EDADW chat mode :contentReference[oaicite:4]{index=4}
45
- # single-turn chat
 
46
  resp = chat_engine.chat(ev.query)
47
  return ChatResponseEvent(response=resp.response, memory=ev.memory)
48
 
@@ -51,33 +50,22 @@ class ChatWorkflow(Workflow):
51
  return StopEvent(result=ev.response)
52
 
53
  # ─── Streamlit UI & Session State ────────────────────────────────
54
- st.set_page_config(page_title="PDF Chatbot", layout="wide") # responsive layout :contentReference[oaicite:5]{index=5}
55
  st.title("πŸ“„ Chat with Your PDF")
56
 
57
- # 1) Ingest once or load existing index via index_store.json
58
  if "index_ready" not in st.session_state:
59
- os.makedirs(INDEX_DIR, exist_ok=True) # ensure folder exists
60
  index_meta = os.path.join(INDEX_DIR, "index_store.json")
61
  if os.path.isfile(index_meta):
62
- # Found LlamaIndex metadata β†’ reuse existing index
63
  st.session_state.index_ready = True
64
  st.success("πŸ“š Loaded existing index from index_store.json!")
65
  else:
66
- # No index_store.json β†’ build index now
67
  docs = LlamaParse(
68
  result_type="markdown",
69
  content_guideline_instruction=(
70
  "You are processing a company’s quarterly earnings-call slide deck. "
71
- "For each slide, produce a clearly sectioned Markdown fragment that includes:\n\n"
72
- "1. **Slide metadata**: slide number, title, and any subtitle or date\n"
73
- "2. **Key bullet points**: preserve existing bullets, but rewrite for clarity\n"
74
- "3. **Tables**: convert any tables into Markdown tables, capturing headers and all rows\n"
75
- "4. **Charts & graphs**: summarize each chart/graph in prose, highlighting axes labels, trends, and top 3 data points or percentage changes\n"
76
- "5. **Figures & images**: if there’s a figure caption, include it verbatim; otherwise, describe the visual in one sentence\n"
77
- "6. **Numeric callouts**: pull out any KPIs (revenue, EPS, growth rates) into a β€œMetrics” subsection\n"
78
- "7. **Overall slide summary**: a 1–2-sentence plain-English takeaway for the slide’s purpose or conclusion\n\n"
79
- "Keep the output strictly in Markdown, using headings (`##`, `###`), lists (`-`), and tables syntax. "
80
- "Do not include any LLM-specific commentary or markdown outside these rules."
81
  )
82
  ).load_data(PDF_PATH)
83
  idx = VectorStoreIndex.from_documents(
@@ -86,34 +74,38 @@ if "index_ready" not in st.session_state:
86
  )
87
  idx.storage_context.persist(persist_dir=INDEX_DIR)
88
  st.session_state.index_ready = True
89
- st.success("πŸ“š Indexed your document and created index_store.json!") # user feedback
90
-
91
 
92
  # 2) Initialize memory & workflow
93
  if "memory" not in st.session_state:
94
  st.session_state.memory = ChatMemoryBuffer.from_defaults(
95
  llm=OpenAI(model="gpt-4o"), token_limit=1500
96
- ) # simple chat memory :contentReference[oaicite:6]{index=6}
97
  if "workflow" not in st.session_state:
98
  st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
99
 
100
- # 3) User input
101
  user_input = st.text_input("Ask a question about the document:")
102
  if user_input:
103
- # Drive the async workflow.run() on our loop
104
- stop_evt: StopEvent = loop.run_until_complete(
105
  st.session_state.workflow.run(
106
  index_dir=INDEX_DIR,
107
  query=user_input,
108
  system_prompt=SYSTEM_PROMPT,
109
  memory=st.session_state.memory,
110
  llm=OpenAI(model="gpt-4o")
111
- )
 
112
  )
 
 
 
 
113
  st.session_state.memory = stop_evt.memory
114
  st.markdown(f"**Bot:** {stop_evt.result}")
115
 
116
- # 4) End Chat button
117
  if st.button("End Chat"):
118
  st.write("Chat ended. Refresh to start over.")
119
- st.stop()
 
1
  import os
2
  import streamlit as st
3
  import nest_asyncio
 
4
 
5
+ # ─── PATCH STREAMLIT’S LOOP ──────────────────────────────────────
6
+ nest_asyncio.apply() # allow nested awaits on Tornado’s loop
7
  import asyncio
8
+ loop = asyncio.get_event_loop() # grab the running Streamlit/Tornado loop
 
 
9
 
10
  # ─── LlamaIndex & Parser Imports ────────────────────────────────
11
  from llama_index.core import StorageContext, load_index_from_storage
 
17
  from llama_index.core.memory import ChatMemoryBuffer
18
 
19
  # ─── Constants ───────────────────────────────────────────────────
20
+ PDF_PATH = "./data/bank-of-america.pdf"
21
+ INDEX_DIR = "./index_data"
22
  SYSTEM_PROMPT = (
23
+ "You are an expert analyst, who excels in analyzing a company's earnings call deck. "
24
+ "Answer questions ONLY from the indexed document."
25
  )
26
 
27
  # ─── Workflow Definition ─────────────────────────────────────────
 
32
  class ChatWorkflow(Workflow):
33
  @step
34
  async def answer(self, ev: StartEvent) -> ChatResponseEvent:
 
35
  storage = StorageContext.from_defaults(persist_dir=ev.index_dir)
36
  index = load_index_from_storage(storage)
 
37
  chat_engine = index.as_chat_engine(
38
  chat_mode="context",
39
  memory=ev.memory,
40
  system_prompt=ev.system_prompt,
41
  llm=ev.llm
42
+ )
43
+ # Use sync call inside async stepβ€”but it's fine since it's small;
44
+ # you could also `await chat_engine.achat(...)` if available
45
  resp = chat_engine.chat(ev.query)
46
  return ChatResponseEvent(response=resp.response, memory=ev.memory)
47
 
 
50
  return StopEvent(result=ev.response)
51
 
52
  # ─── Streamlit UI & Session State ────────────────────────────────
53
+ st.set_page_config(page_title="PDF Chatbot", layout="wide")
54
  st.title("πŸ“„ Chat with Your PDF")
55
 
56
+ # 1) Build or load the index once
57
  if "index_ready" not in st.session_state:
58
+ os.makedirs(INDEX_DIR, exist_ok=True)
59
  index_meta = os.path.join(INDEX_DIR, "index_store.json")
60
  if os.path.isfile(index_meta):
 
61
  st.session_state.index_ready = True
62
  st.success("πŸ“š Loaded existing index from index_store.json!")
63
  else:
 
64
  docs = LlamaParse(
65
  result_type="markdown",
66
  content_guideline_instruction=(
67
  "You are processing a company’s quarterly earnings-call slide deck. "
68
+ "For each slide, produce a clearly sectioned Markdown fragment..."
 
 
 
 
 
 
 
 
 
69
  )
70
  ).load_data(PDF_PATH)
71
  idx = VectorStoreIndex.from_documents(
 
74
  )
75
  idx.storage_context.persist(persist_dir=INDEX_DIR)
76
  st.session_state.index_ready = True
77
+ st.success("πŸ“š Indexed your document and created index_store.json!")
 
78
 
79
  # 2) Initialize memory & workflow
80
  if "memory" not in st.session_state:
81
  st.session_state.memory = ChatMemoryBuffer.from_defaults(
82
  llm=OpenAI(model="gpt-4o"), token_limit=1500
83
+ )
84
  if "workflow" not in st.session_state:
85
  st.session_state.workflow = ChatWorkflow(timeout=None, verbose=False)
86
 
87
+ # 3) User input & async invocation
88
  user_input = st.text_input("Ask a question about the document:")
89
  if user_input:
90
+ # Schedule the coroutine on Streamlit's running loop
91
+ future = asyncio.run_coroutine_threadsafe(
92
  st.session_state.workflow.run(
93
  index_dir=INDEX_DIR,
94
  query=user_input,
95
  system_prompt=SYSTEM_PROMPT,
96
  memory=st.session_state.memory,
97
  llm=OpenAI(model="gpt-4o")
98
+ ),
99
+ loop
100
  )
101
+ # Wait for it to finish (non-blocking at the loop level)
102
+ stop_evt: StopEvent = future.result()
103
+
104
+ # Update session state & display
105
  st.session_state.memory = stop_evt.memory
106
  st.markdown(f"**Bot:** {stop_evt.result}")
107
 
108
+ # 4) End Chat
109
  if st.button("End Chat"):
110
  st.write("Chat ended. Refresh to start over.")
111
+ st.stop()