Spaces:
Running
Running
Gen. Overseer Lupo
commited on
Commit
·
22123a0
1
Parent(s):
1aab675
ui: clean filters; use 'categories'; dedup; optional diagnostics; Space-safe env
Browse files- app/ui_streamlit.py +57 -84
app/ui_streamlit.py
CHANGED
|
@@ -1,54 +1,21 @@
|
|
|
|
|
| 1 |
import os, json
|
| 2 |
-
import streamlit as st
|
| 3 |
-
from dotenv import dotenv_values
|
| 4 |
-
from app.search import search
|
| 5 |
-
|
| 6 |
from pathlib import Path
|
| 7 |
-
import
|
| 8 |
-
from app.search import search
|
| 9 |
|
| 10 |
from app.main import get_env, ensure_index_exists
|
| 11 |
-
_env = get_env()
|
| 12 |
-
ensure_index_exists(_env)
|
| 13 |
-
# --- TEMP: Quick Search (no filters, dedup) ---
|
| 14 |
-
import streamlit as st
|
| 15 |
from app.search import search
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
out = []
|
| 20 |
-
for r in rows or []:
|
| 21 |
-
k = r.get("id") or r.get("url") or r.get("title")
|
| 22 |
-
if not k or k in seen:
|
| 23 |
-
continue
|
| 24 |
-
seen.add(k)
|
| 25 |
-
out.append(r)
|
| 26 |
-
return out
|
| 27 |
-
|
| 28 |
-
st.header("Quick Search (temporary)")
|
| 29 |
-
q = st.text_input("Query", value="")
|
| 30 |
-
run = st.button("Run search")
|
| 31 |
-
|
| 32 |
-
if run or q:
|
| 33 |
-
with st.spinner("Searching…"):
|
| 34 |
-
results = _dedup_records(search(q or "transportation", _env, top_k=25, filters={}))
|
| 35 |
-
st.caption(f"Results: {len(results)}")
|
| 36 |
-
for r in results:
|
| 37 |
-
st.markdown(f"**{r.get('title','(no title)')}**")
|
| 38 |
-
if r.get("url"):
|
| 39 |
-
st.write(r["url"])
|
| 40 |
-
meta_bits = []
|
| 41 |
-
if r.get("source"): meta_bits.append(r["source"])
|
| 42 |
-
if r.get("score") is not None: meta_bits.append(f"score={r['score']:.3f}")
|
| 43 |
-
if meta_bits:
|
| 44 |
-
st.caption(" · ".join(meta_bits))
|
| 45 |
-
st.divider()
|
| 46 |
-
# --- END TEMP ---
|
| 47 |
|
|
|
|
|
|
|
|
|
|
| 48 |
|
|
|
|
| 49 |
def _dedup_records(rows):
|
| 50 |
-
seen = set()
|
| 51 |
-
out = []
|
| 52 |
for r in rows or []:
|
| 53 |
k = r.get("id") or r.get("url") or r.get("title")
|
| 54 |
if not k or k in seen:
|
|
@@ -56,13 +23,11 @@ def _dedup_records(rows):
|
|
| 56 |
seen.add(k)
|
| 57 |
out.append(r)
|
| 58 |
return out
|
|
|
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
import streamlit as st
|
| 64 |
-
idx = Path(env["INDEX_DIR"])
|
| 65 |
-
st.subheader("Diagnostics (temporary)")
|
| 66 |
st.write("INDEX_DIR:", str(idx))
|
| 67 |
st.write("faiss.index exists:", (idx / "faiss.index").exists())
|
| 68 |
st.write("meta.json exists:", (idx / "meta.json").exists())
|
|
@@ -70,41 +35,26 @@ def _diagnostics(env):
|
|
| 70 |
try:
|
| 71 |
meta = json.loads((idx / "meta.json").read_text())
|
| 72 |
st.write("meta.json count:", len(meta))
|
| 73 |
-
|
| 74 |
-
sample = [{"id": m.get("id"), "title": m.get("title")} for m in meta[:2]]
|
| 75 |
-
st.write("meta head:", sample)
|
| 76 |
except Exception as e:
|
| 77 |
st.error(f"Failed to read meta.json: {e!r}")
|
| 78 |
-
|
| 79 |
-
# Try a sample query without filters
|
| 80 |
try:
|
| 81 |
-
demo = search("transportation",
|
| 82 |
st.write("sample search('transportation') results:", len(demo))
|
| 83 |
if demo:
|
| 84 |
st.write(demo[:3])
|
| 85 |
except Exception as e:
|
| 86 |
st.error(f"search() raised: {e!r}")
|
|
|
|
| 87 |
|
| 88 |
-
# call diagnostics (you can wrap behind a checkbox if you prefer)
|
| 89 |
-
_diagnostics(_env)
|
| 90 |
-
# --- END TEMP DIAGNOSTICS ---
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
st.set_page_config(page_title="Grants Discovery RAG", layout="wide")
|
| 95 |
st.title("Grants Discovery RAG (Capacity Building)")
|
| 96 |
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
"INDEX_DIR":"data/index",
|
| 104 |
-
"EXPORT_DIR":"data/exports",
|
| 105 |
-
}
|
| 106 |
-
|
| 107 |
-
preset = st.radio("Quick topic:", ["General", "Elderly", "Prison Ministry", "Evangelism", "Vehicles/Transport", "FTA 5310"], horizontal=True)
|
| 108 |
default_q = {
|
| 109 |
"General": "capacity building",
|
| 110 |
"Elderly": "capacity building for seniors and aging services",
|
|
@@ -114,16 +64,33 @@ default_q = {
|
|
| 114 |
"FTA 5310": "5310 Enhanced Mobility Seniors Individuals with Disabilities",
|
| 115 |
}.get(preset, "capacity building")
|
| 116 |
|
|
|
|
| 117 |
q = st.text_input("Search query", value=default_q)
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
top_k = st.slider("Results", 5, 50, 15)
|
| 121 |
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
with col1:
|
| 124 |
if st.button("Search"):
|
| 125 |
try:
|
| 126 |
-
results = search(q,
|
|
|
|
| 127 |
st.session_state["results"] = results
|
| 128 |
except Exception as e:
|
| 129 |
st.error(str(e))
|
|
@@ -134,23 +101,29 @@ with col2:
|
|
| 134 |
if not results:
|
| 135 |
st.warning("No results to export. Run a search first.")
|
| 136 |
else:
|
| 137 |
-
os.makedirs(
|
| 138 |
-
out_path = os.path.join(
|
| 139 |
import pandas as pd
|
| 140 |
pd.DataFrame(results).to_csv(out_path, index=False)
|
| 141 |
st.success(f"Exported to {out_path}")
|
| 142 |
|
| 143 |
st.markdown("---")
|
|
|
|
| 144 |
results = st.session_state.get("results", [])
|
| 145 |
if results:
|
|
|
|
| 146 |
for r in results:
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
st.caption("Note: This item may display an ID or number instead of a full link. Open on Grants.gov if needed.")
|
| 152 |
-
|
| 153 |
-
st.write(f"[Open Link]({link}) \nScore: {r.get('score', 0):.3f}")
|
| 154 |
st.markdown("---")
|
| 155 |
else:
|
| 156 |
st.info("Enter a query and click Search.")
|
|
|
|
| 1 |
+
# app/ui_streamlit.py
|
| 2 |
import os, json
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
from pathlib import Path
|
| 4 |
+
import streamlit as st
|
|
|
|
| 5 |
|
| 6 |
from app.main import get_env, ensure_index_exists
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from app.search import search
|
| 8 |
|
| 9 |
+
# Streamlit config should be the first Streamlit call
|
| 10 |
+
st.set_page_config(page_title="Grants Discovery RAG", layout="wide")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# Environment + index
|
| 13 |
+
_env = get_env()
|
| 14 |
+
ensure_index_exists(_env)
|
| 15 |
|
| 16 |
+
# ---------- helpers ----------
|
| 17 |
def _dedup_records(rows):
|
| 18 |
+
seen, out = set(), []
|
|
|
|
| 19 |
for r in rows or []:
|
| 20 |
k = r.get("id") or r.get("url") or r.get("title")
|
| 21 |
if not k or k in seen:
|
|
|
|
| 23 |
seen.add(k)
|
| 24 |
out.append(r)
|
| 25 |
return out
|
| 26 |
+
# ---------- end helpers ----------
|
| 27 |
|
| 28 |
+
# ---------- optional diagnostics ----------
|
| 29 |
+
with st.expander("Diagnostics (optional)", expanded=False):
|
| 30 |
+
idx = Path(_env["INDEX_DIR"])
|
|
|
|
|
|
|
|
|
|
| 31 |
st.write("INDEX_DIR:", str(idx))
|
| 32 |
st.write("faiss.index exists:", (idx / "faiss.index").exists())
|
| 33 |
st.write("meta.json exists:", (idx / "meta.json").exists())
|
|
|
|
| 35 |
try:
|
| 36 |
meta = json.loads((idx / "meta.json").read_text())
|
| 37 |
st.write("meta.json count:", len(meta))
|
| 38 |
+
st.write("meta head:", [{"id": m.get("id"), "title": m.get("title")} for m in meta[:2]])
|
|
|
|
|
|
|
| 39 |
except Exception as e:
|
| 40 |
st.error(f"Failed to read meta.json: {e!r}")
|
|
|
|
|
|
|
| 41 |
try:
|
| 42 |
+
demo = search("transportation", _env, top_k=3, filters={})
|
| 43 |
st.write("sample search('transportation') results:", len(demo))
|
| 44 |
if demo:
|
| 45 |
st.write(demo[:3])
|
| 46 |
except Exception as e:
|
| 47 |
st.error(f"search() raised: {e!r}")
|
| 48 |
+
# ---------- end diagnostics ----------
|
| 49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
st.title("Grants Discovery RAG (Capacity Building)")
|
| 51 |
|
| 52 |
+
preset = st.radio(
|
| 53 |
+
"Quick topic:",
|
| 54 |
+
["General", "Elderly", "Prison Ministry", "Evangelism", "Vehicles/Transport", "FTA 5310"],
|
| 55 |
+
horizontal=True
|
| 56 |
+
)
|
| 57 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
default_q = {
|
| 59 |
"General": "capacity building",
|
| 60 |
"Elderly": "capacity building for seniors and aging services",
|
|
|
|
| 64 |
"FTA 5310": "5310 Enhanced Mobility Seniors Individuals with Disabilities",
|
| 65 |
}.get(preset, "capacity building")
|
| 66 |
|
| 67 |
+
# --- controls ---
|
| 68 |
q = st.text_input("Search query", value=default_q)
|
| 69 |
+
|
| 70 |
+
# No defaults -> no filtering unless the user selects something
|
| 71 |
+
geo = st.multiselect("Geo filter (optional)", options=["US", "MD", "MA"], default=[])
|
| 72 |
+
categories = st.multiselect(
|
| 73 |
+
"Category filter (optional)",
|
| 74 |
+
options=["capacity_building", "elderly", "prison_ministry", "evangelism", "transportation", "vehicle"],
|
| 75 |
+
default=[]
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
top_k = st.slider("Results", 5, 50, 15)
|
| 79 |
|
| 80 |
+
# Build filters only when selected
|
| 81 |
+
filters = {}
|
| 82 |
+
if geo:
|
| 83 |
+
filters["geo"] = geo
|
| 84 |
+
if categories:
|
| 85 |
+
filters["categories"] = categories # <- use 'categories' key (not 'cats')
|
| 86 |
+
|
| 87 |
+
col1, col2 = st.columns([1, 1])
|
| 88 |
+
|
| 89 |
with col1:
|
| 90 |
if st.button("Search"):
|
| 91 |
try:
|
| 92 |
+
results = search(q, _env, top_k=top_k, filters=filters)
|
| 93 |
+
results = _dedup_records(results)
|
| 94 |
st.session_state["results"] = results
|
| 95 |
except Exception as e:
|
| 96 |
st.error(str(e))
|
|
|
|
| 101 |
if not results:
|
| 102 |
st.warning("No results to export. Run a search first.")
|
| 103 |
else:
|
| 104 |
+
os.makedirs(_env["EXPORT_DIR"], exist_ok=True)
|
| 105 |
+
out_path = os.path.join(_env["EXPORT_DIR"], "results.csv")
|
| 106 |
import pandas as pd
|
| 107 |
pd.DataFrame(results).to_csv(out_path, index=False)
|
| 108 |
st.success(f"Exported to {out_path}")
|
| 109 |
|
| 110 |
st.markdown("---")
|
| 111 |
+
|
| 112 |
results = st.session_state.get("results", [])
|
| 113 |
if results:
|
| 114 |
+
st.caption(f"Results: {len(results)}")
|
| 115 |
for r in results:
|
| 116 |
+
title = r.get("title", "(no title)")
|
| 117 |
+
url = r.get("url", "")
|
| 118 |
+
cats = r.get("categories") or r.get("cats") or []
|
| 119 |
+
geo_tags = r.get("geo") or []
|
| 120 |
+
|
| 121 |
+
st.markdown(f"### {title}")
|
| 122 |
+
st.write(f"**Source:** {r.get('source','')} | **Geo:** {', '.join(geo_tags) if isinstance(geo_tags, list) else geo_tags} | **Categories:** {', '.join(cats) if isinstance(cats, list) else cats}")
|
| 123 |
+
|
| 124 |
+
if url and not url.startswith("http"):
|
| 125 |
st.caption("Note: This item may display an ID or number instead of a full link. Open on Grants.gov if needed.")
|
| 126 |
+
st.write(f"[Open Link]({url}) \nScore: {r.get('score', 0):.3f}")
|
|
|
|
| 127 |
st.markdown("---")
|
| 128 |
else:
|
| 129 |
st.info("Enter a query and click Search.")
|