Gen. Overseer Lupo commited on
Commit
22123a0
·
1 Parent(s): 1aab675

ui: clean filters; use 'categories'; dedup; optional diagnostics; Space-safe env

Browse files
Files changed (1) hide show
  1. app/ui_streamlit.py +57 -84
app/ui_streamlit.py CHANGED
@@ -1,54 +1,21 @@
 
1
  import os, json
2
- import streamlit as st
3
- from dotenv import dotenv_values
4
- from app.search import search
5
-
6
  from pathlib import Path
7
- import json
8
- from app.search import search
9
 
10
  from app.main import get_env, ensure_index_exists
11
- _env = get_env()
12
- ensure_index_exists(_env)
13
- # --- TEMP: Quick Search (no filters, dedup) ---
14
- import streamlit as st
15
  from app.search import search
16
 
17
- def _dedup_records(rows):
18
- seen = set()
19
- out = []
20
- for r in rows or []:
21
- k = r.get("id") or r.get("url") or r.get("title")
22
- if not k or k in seen:
23
- continue
24
- seen.add(k)
25
- out.append(r)
26
- return out
27
-
28
- st.header("Quick Search (temporary)")
29
- q = st.text_input("Query", value="")
30
- run = st.button("Run search")
31
-
32
- if run or q:
33
- with st.spinner("Searching…"):
34
- results = _dedup_records(search(q or "transportation", _env, top_k=25, filters={}))
35
- st.caption(f"Results: {len(results)}")
36
- for r in results:
37
- st.markdown(f"**{r.get('title','(no title)')}**")
38
- if r.get("url"):
39
- st.write(r["url"])
40
- meta_bits = []
41
- if r.get("source"): meta_bits.append(r["source"])
42
- if r.get("score") is not None: meta_bits.append(f"score={r['score']:.3f}")
43
- if meta_bits:
44
- st.caption(" · ".join(meta_bits))
45
- st.divider()
46
- # --- END TEMP ---
47
 
 
 
 
48
 
 
49
  def _dedup_records(rows):
50
- seen = set()
51
- out = []
52
  for r in rows or []:
53
  k = r.get("id") or r.get("url") or r.get("title")
54
  if not k or k in seen:
@@ -56,13 +23,11 @@ def _dedup_records(rows):
56
  seen.add(k)
57
  out.append(r)
58
  return out
 
59
 
60
-
61
- # --- TEMP DIAGNOSTICS: shows index status and runs a sample search ---
62
- def _diagnostics(env):
63
- import streamlit as st
64
- idx = Path(env["INDEX_DIR"])
65
- st.subheader("Diagnostics (temporary)")
66
  st.write("INDEX_DIR:", str(idx))
67
  st.write("faiss.index exists:", (idx / "faiss.index").exists())
68
  st.write("meta.json exists:", (idx / "meta.json").exists())
@@ -70,41 +35,26 @@ def _diagnostics(env):
70
  try:
71
  meta = json.loads((idx / "meta.json").read_text())
72
  st.write("meta.json count:", len(meta))
73
- # show first 2 meta entries (id + title)
74
- sample = [{"id": m.get("id"), "title": m.get("title")} for m in meta[:2]]
75
- st.write("meta head:", sample)
76
  except Exception as e:
77
  st.error(f"Failed to read meta.json: {e!r}")
78
-
79
- # Try a sample query without filters
80
  try:
81
- demo = search("transportation", env, top_k=5, filters={})
82
  st.write("sample search('transportation') results:", len(demo))
83
  if demo:
84
  st.write(demo[:3])
85
  except Exception as e:
86
  st.error(f"search() raised: {e!r}")
 
87
 
88
- # call diagnostics (you can wrap behind a checkbox if you prefer)
89
- _diagnostics(_env)
90
- # --- END TEMP DIAGNOSTICS ---
91
-
92
-
93
-
94
- st.set_page_config(page_title="Grants Discovery RAG", layout="wide")
95
  st.title("Grants Discovery RAG (Capacity Building)")
96
 
97
- env = dotenv_values(".env")
98
- if not env:
99
- st.warning("No .env found. Using defaults.")
100
- env = {
101
- "DATA_DIR":"data",
102
- "DOCSTORE_DIR":"data/docstore",
103
- "INDEX_DIR":"data/index",
104
- "EXPORT_DIR":"data/exports",
105
- }
106
-
107
- preset = st.radio("Quick topic:", ["General", "Elderly", "Prison Ministry", "Evangelism", "Vehicles/Transport", "FTA 5310"], horizontal=True)
108
  default_q = {
109
  "General": "capacity building",
110
  "Elderly": "capacity building for seniors and aging services",
@@ -114,16 +64,33 @@ default_q = {
114
  "FTA 5310": "5310 Enhanced Mobility Seniors Individuals with Disabilities",
115
  }.get(preset, "capacity building")
116
 
 
117
  q = st.text_input("Search query", value=default_q)
118
- geo = st.multiselect("Geo filter", options=["US","MD","MA"], default=["US"])
119
- cats = st.multiselect("Category filter", options=["capacity_building","elderly","prison_ministry","evangelism","transportation","vehicle"], default=["capacity_building"])
 
 
 
 
 
 
 
120
  top_k = st.slider("Results", 5, 50, 15)
121
 
122
- col1, col2 = st.columns([1,1])
 
 
 
 
 
 
 
 
123
  with col1:
124
  if st.button("Search"):
125
  try:
126
- results = search(q, env, top_k=top_k, filters={"geo": geo, "categories": cats})
 
127
  st.session_state["results"] = results
128
  except Exception as e:
129
  st.error(str(e))
@@ -134,23 +101,29 @@ with col2:
134
  if not results:
135
  st.warning("No results to export. Run a search first.")
136
  else:
137
- os.makedirs(env["EXPORT_DIR"], exist_ok=True)
138
- out_path = os.path.join(env["EXPORT_DIR"], "results.csv")
139
  import pandas as pd
140
  pd.DataFrame(results).to_csv(out_path, index=False)
141
  st.success(f"Exported to {out_path}")
142
 
143
  st.markdown("---")
 
144
  results = st.session_state.get("results", [])
145
  if results:
 
146
  for r in results:
147
- st.markdown(f"### {r.get('title','(no title)')}")
148
- st.write(f"**Source:** {r.get('source','')} | **Geo:** {r.get('geo','')} | **Categories:** {', '.join(r.get('categories',[]))}")
149
- link = r.get('url','')
150
- if link and not link.startswith("http"):
 
 
 
 
 
151
  st.caption("Note: This item may display an ID or number instead of a full link. Open on Grants.gov if needed.")
152
- # ✅ Fixed: one line, no syntax error
153
- st.write(f"[Open Link]({link}) \nScore: {r.get('score', 0):.3f}")
154
  st.markdown("---")
155
  else:
156
  st.info("Enter a query and click Search.")
 
1
+ # app/ui_streamlit.py
2
  import os, json
 
 
 
 
3
  from pathlib import Path
4
+ import streamlit as st
 
5
 
6
  from app.main import get_env, ensure_index_exists
 
 
 
 
7
  from app.search import search
8
 
9
+ # Streamlit config should be the first Streamlit call
10
+ st.set_page_config(page_title="Grants Discovery RAG", layout="wide")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ # Environment + index
13
+ _env = get_env()
14
+ ensure_index_exists(_env)
15
 
16
+ # ---------- helpers ----------
17
  def _dedup_records(rows):
18
+ seen, out = set(), []
 
19
  for r in rows or []:
20
  k = r.get("id") or r.get("url") or r.get("title")
21
  if not k or k in seen:
 
23
  seen.add(k)
24
  out.append(r)
25
  return out
26
+ # ---------- end helpers ----------
27
 
28
+ # ---------- optional diagnostics ----------
29
+ with st.expander("Diagnostics (optional)", expanded=False):
30
+ idx = Path(_env["INDEX_DIR"])
 
 
 
31
  st.write("INDEX_DIR:", str(idx))
32
  st.write("faiss.index exists:", (idx / "faiss.index").exists())
33
  st.write("meta.json exists:", (idx / "meta.json").exists())
 
35
  try:
36
  meta = json.loads((idx / "meta.json").read_text())
37
  st.write("meta.json count:", len(meta))
38
+ st.write("meta head:", [{"id": m.get("id"), "title": m.get("title")} for m in meta[:2]])
 
 
39
  except Exception as e:
40
  st.error(f"Failed to read meta.json: {e!r}")
 
 
41
  try:
42
+ demo = search("transportation", _env, top_k=3, filters={})
43
  st.write("sample search('transportation') results:", len(demo))
44
  if demo:
45
  st.write(demo[:3])
46
  except Exception as e:
47
  st.error(f"search() raised: {e!r}")
48
+ # ---------- end diagnostics ----------
49
 
 
 
 
 
 
 
 
50
  st.title("Grants Discovery RAG (Capacity Building)")
51
 
52
+ preset = st.radio(
53
+ "Quick topic:",
54
+ ["General", "Elderly", "Prison Ministry", "Evangelism", "Vehicles/Transport", "FTA 5310"],
55
+ horizontal=True
56
+ )
57
+
 
 
 
 
 
58
  default_q = {
59
  "General": "capacity building",
60
  "Elderly": "capacity building for seniors and aging services",
 
64
  "FTA 5310": "5310 Enhanced Mobility Seniors Individuals with Disabilities",
65
  }.get(preset, "capacity building")
66
 
67
+ # --- controls ---
68
  q = st.text_input("Search query", value=default_q)
69
+
70
+ # No defaults -> no filtering unless the user selects something
71
+ geo = st.multiselect("Geo filter (optional)", options=["US", "MD", "MA"], default=[])
72
+ categories = st.multiselect(
73
+ "Category filter (optional)",
74
+ options=["capacity_building", "elderly", "prison_ministry", "evangelism", "transportation", "vehicle"],
75
+ default=[]
76
+ )
77
+
78
  top_k = st.slider("Results", 5, 50, 15)
79
 
80
+ # Build filters only when selected
81
+ filters = {}
82
+ if geo:
83
+ filters["geo"] = geo
84
+ if categories:
85
+ filters["categories"] = categories # <- use 'categories' key (not 'cats')
86
+
87
+ col1, col2 = st.columns([1, 1])
88
+
89
  with col1:
90
  if st.button("Search"):
91
  try:
92
+ results = search(q, _env, top_k=top_k, filters=filters)
93
+ results = _dedup_records(results)
94
  st.session_state["results"] = results
95
  except Exception as e:
96
  st.error(str(e))
 
101
  if not results:
102
  st.warning("No results to export. Run a search first.")
103
  else:
104
+ os.makedirs(_env["EXPORT_DIR"], exist_ok=True)
105
+ out_path = os.path.join(_env["EXPORT_DIR"], "results.csv")
106
  import pandas as pd
107
  pd.DataFrame(results).to_csv(out_path, index=False)
108
  st.success(f"Exported to {out_path}")
109
 
110
  st.markdown("---")
111
+
112
  results = st.session_state.get("results", [])
113
  if results:
114
+ st.caption(f"Results: {len(results)}")
115
  for r in results:
116
+ title = r.get("title", "(no title)")
117
+ url = r.get("url", "")
118
+ cats = r.get("categories") or r.get("cats") or []
119
+ geo_tags = r.get("geo") or []
120
+
121
+ st.markdown(f"### {title}")
122
+ st.write(f"**Source:** {r.get('source','')} | **Geo:** {', '.join(geo_tags) if isinstance(geo_tags, list) else geo_tags} | **Categories:** {', '.join(cats) if isinstance(cats, list) else cats}")
123
+
124
+ if url and not url.startswith("http"):
125
  st.caption("Note: This item may display an ID or number instead of a full link. Open on Grants.gov if needed.")
126
+ st.write(f"[Open Link]({url}) \nScore: {r.get('score', 0):.3f}")
 
127
  st.markdown("---")
128
  else:
129
  st.info("Enter a query and click Search.")