Spaces:

michaellupo74
/

grants-rag

Running

App Files Files Community

Gen. Overseer Lupo commited on Sep 8

Commit

22123a0

1 Parent(s): 1aab675

ui: clean filters; use 'categories'; dedup; optional diagnostics; Space-safe env

Browse files

Files changed (1) hide show

app/ui_streamlit.py +57 -84

app/ui_streamlit.py CHANGED Viewed

@@ -1,54 +1,21 @@
 import os, json
-import streamlit as st
-from dotenv import dotenv_values
-from app.search import search
 from pathlib import Path
-import json
-from app.search import search
 from app.main import get_env, ensure_index_exists
-_env = get_env()
-ensure_index_exists(_env)
-# --- TEMP: Quick Search (no filters, dedup) ---
-import streamlit as st
 from app.search import search
-def _dedup_records(rows):
-    seen = set()
-    out = []
-    for r in rows or []:
-        k = r.get("id") or r.get("url") or r.get("title")
-        if not k or k in seen:
-            continue
-        seen.add(k)
-        out.append(r)
-    return out
-st.header("Quick Search (temporary)")
-q = st.text_input("Query", value="")
-run = st.button("Run search")
-if run or q:
-    with st.spinner("Searching…"):
-        results = _dedup_records(search(q or "transportation", _env, top_k=25, filters={}))
-    st.caption(f"Results: {len(results)}")
-    for r in results:
-        st.markdown(f"**{r.get('title','(no title)')}**")
-        if r.get("url"):
-            st.write(r["url"])
-        meta_bits = []
-        if r.get("source"): meta_bits.append(r["source"])
-        if r.get("score") is not None: meta_bits.append(f"score={r['score']:.3f}")
-        if meta_bits:
-            st.caption(" · ".join(meta_bits))
-        st.divider()
-# --- END TEMP ---
 def _dedup_records(rows):
-    seen = set()
-    out = []
     for r in rows or []:
         k = r.get("id") or r.get("url") or r.get("title")
         if not k or k in seen:
@@ -56,13 +23,11 @@ def _dedup_records(rows):
         seen.add(k)
         out.append(r)
     return out
-# --- TEMP DIAGNOSTICS: shows index status and runs a sample search ---
-def _diagnostics(env):
-    import streamlit as st
-    idx = Path(env["INDEX_DIR"])
-    st.subheader("Diagnostics (temporary)")
     st.write("INDEX_DIR:", str(idx))
     st.write("faiss.index exists:", (idx / "faiss.index").exists())
     st.write("meta.json exists:", (idx / "meta.json").exists())
@@ -70,41 +35,26 @@ def _diagnostics(env):
         try:
             meta = json.loads((idx / "meta.json").read_text())
             st.write("meta.json count:", len(meta))
-            # show first 2 meta entries (id + title)
-            sample = [{"id": m.get("id"), "title": m.get("title")} for m in meta[:2]]
-            st.write("meta head:", sample)
         except Exception as e:
             st.error(f"Failed to read meta.json: {e!r}")
-    # Try a sample query without filters
     try:
-        demo = search("transportation", env, top_k=5, filters={})
         st.write("sample search('transportation') results:", len(demo))
         if demo:
             st.write(demo[:3])
     except Exception as e:
         st.error(f"search() raised: {e!r}")
-# call diagnostics (you can wrap behind a checkbox if you prefer)
-_diagnostics(_env)
-# --- END TEMP DIAGNOSTICS ---
-st.set_page_config(page_title="Grants Discovery RAG", layout="wide")
 st.title("Grants Discovery RAG (Capacity Building)")
-env = dotenv_values(".env")
-if not env:
-    st.warning("No .env found. Using defaults.")
-    env = {
-        "DATA_DIR":"data",
-        "DOCSTORE_DIR":"data/docstore",
-        "INDEX_DIR":"data/index",
-        "EXPORT_DIR":"data/exports",
-    }
-preset = st.radio("Quick topic:", ["General", "Elderly", "Prison Ministry", "Evangelism", "Vehicles/Transport", "FTA 5310"], horizontal=True)
 default_q = {
     "General": "capacity building",
     "Elderly": "capacity building for seniors and aging services",
@@ -114,16 +64,33 @@ default_q = {
     "FTA 5310": "5310 Enhanced Mobility Seniors Individuals with Disabilities",
 }.get(preset, "capacity building")
 q = st.text_input("Search query", value=default_q)
-geo = st.multiselect("Geo filter", options=["US","MD","MA"], default=["US"])
-cats = st.multiselect("Category filter", options=["capacity_building","elderly","prison_ministry","evangelism","transportation","vehicle"], default=["capacity_building"])
 top_k = st.slider("Results", 5, 50, 15)
-col1, col2 = st.columns([1,1])
 with col1:
     if st.button("Search"):
         try:
-            results = search(q, env, top_k=top_k, filters={"geo": geo, "categories": cats})
             st.session_state["results"] = results
         except Exception as e:
             st.error(str(e))
@@ -134,23 +101,29 @@ with col2:
         if not results:
             st.warning("No results to export. Run a search first.")
         else:
-            os.makedirs(env["EXPORT_DIR"], exist_ok=True)
-            out_path = os.path.join(env["EXPORT_DIR"], "results.csv")
             import pandas as pd
             pd.DataFrame(results).to_csv(out_path, index=False)
             st.success(f"Exported to {out_path}")
 st.markdown("---")
 results = st.session_state.get("results", [])
 if results:
     for r in results:
-        st.markdown(f"### {r.get('title','(no title)')}")
-        st.write(f"**Source:** {r.get('source','')} | **Geo:** {r.get('geo','')} | **Categories:** {', '.join(r.get('categories',[]))}")
-        link = r.get('url','')
-        if link and not link.startswith("http"):
             st.caption("Note: This item may display an ID or number instead of a full link. Open on Grants.gov if needed.")
-        # ✅ Fixed: one line, no syntax error
-        st.write(f"[Open Link]({link})  \nScore: {r.get('score', 0):.3f}")
         st.markdown("---")
 else:
     st.info("Enter a query and click Search.")

+# app/ui_streamlit.py
 import os, json
 from pathlib import Path
+import streamlit as st
 from app.main import get_env, ensure_index_exists
 from app.search import search
+# Streamlit config should be the first Streamlit call
+st.set_page_config(page_title="Grants Discovery RAG", layout="wide")
+# Environment + index
+_env = get_env()
+ensure_index_exists(_env)
+# ---------- helpers ----------
 def _dedup_records(rows):
+    seen, out = set(), []
     for r in rows or []:
         k = r.get("id") or r.get("url") or r.get("title")
         if not k or k in seen:
         seen.add(k)
         out.append(r)
     return out
+# ---------- end helpers ----------
+# ---------- optional diagnostics ----------
+with st.expander("Diagnostics (optional)", expanded=False):
+    idx = Path(_env["INDEX_DIR"])
     st.write("INDEX_DIR:", str(idx))
     st.write("faiss.index exists:", (idx / "faiss.index").exists())
     st.write("meta.json exists:", (idx / "meta.json").exists())
         try:
             meta = json.loads((idx / "meta.json").read_text())
             st.write("meta.json count:", len(meta))
+            st.write("meta head:", [{"id": m.get("id"), "title": m.get("title")} for m in meta[:2]])
         except Exception as e:
             st.error(f"Failed to read meta.json: {e!r}")
     try:
+        demo = search("transportation", _env, top_k=3, filters={})
         st.write("sample search('transportation') results:", len(demo))
         if demo:
             st.write(demo[:3])
     except Exception as e:
         st.error(f"search() raised: {e!r}")
+# ---------- end diagnostics ----------
 st.title("Grants Discovery RAG (Capacity Building)")
+preset = st.radio(
+    "Quick topic:",
+    ["General", "Elderly", "Prison Ministry", "Evangelism", "Vehicles/Transport", "FTA 5310"],
+    horizontal=True
+)
 default_q = {
     "General": "capacity building",
     "Elderly": "capacity building for seniors and aging services",
     "FTA 5310": "5310 Enhanced Mobility Seniors Individuals with Disabilities",
 }.get(preset, "capacity building")
+# --- controls ---
 q = st.text_input("Search query", value=default_q)
+# No defaults -> no filtering unless the user selects something
+geo = st.multiselect("Geo filter (optional)", options=["US", "MD", "MA"], default=[])
+categories = st.multiselect(
+    "Category filter (optional)",
+    options=["capacity_building", "elderly", "prison_ministry", "evangelism", "transportation", "vehicle"],
+    default=[]
+)
 top_k = st.slider("Results", 5, 50, 15)
+# Build filters only when selected
+filters = {}
+if geo:
+    filters["geo"] = geo
+if categories:
+    filters["categories"] = categories  # <- use 'categories' key (not 'cats')
+col1, col2 = st.columns([1, 1])
 with col1:
     if st.button("Search"):
         try:
+            results = search(q, _env, top_k=top_k, filters=filters)
+            results = _dedup_records(results)
             st.session_state["results"] = results
         except Exception as e:
             st.error(str(e))
         if not results:
             st.warning("No results to export. Run a search first.")
         else:
+            os.makedirs(_env["EXPORT_DIR"], exist_ok=True)
+            out_path = os.path.join(_env["EXPORT_DIR"], "results.csv")
             import pandas as pd
             pd.DataFrame(results).to_csv(out_path, index=False)
             st.success(f"Exported to {out_path}")
 st.markdown("---")
 results = st.session_state.get("results", [])
 if results:
+    st.caption(f"Results: {len(results)}")
     for r in results:
+        title = r.get("title", "(no title)")
+        url = r.get("url", "")
+        cats = r.get("categories") or r.get("cats") or []
+        geo_tags = r.get("geo") or []
+        st.markdown(f"### {title}")
+        st.write(f"**Source:** {r.get('source','')} | **Geo:** {', '.join(geo_tags) if isinstance(geo_tags, list) else geo_tags} | **Categories:** {', '.join(cats) if isinstance(cats, list) else cats}")
+        if url and not url.startswith("http"):
             st.caption("Note: This item may display an ID or number instead of a full link. Open on Grants.gov if needed.")
+        st.write(f"[Open Link]({url})  \nScore: {r.get('score', 0):.3f}")
         st.markdown("---")
 else:
     st.info("Enter a query and click Search.")