Spaces:
Sleeping
Sleeping
Gen. Overseer Lupo
commited on
Commit
·
598f5cb
0
Parent(s):
Clean initial commit for HF Spaces (exclude venv & binaries)
Browse files- .env +5 -0
- .env.example +5 -0
- .gitattributes +5 -0
- .gitignore +14 -0
- Makefile +29 -0
- README.md +34 -0
- app/__init__.py +0 -0
- app/ingest.py +146 -0
- app/main.py +77 -0
- app/search.py +37 -0
- app/sources/grantsgov_api.py +120 -0
- app/ui_streamlit.py +68 -0
- config/sources.yaml +44 -0
- config/v6.yaml +52 -0
- data/exports/results.csv +7 -0
- data/grants_fallback_sample.json +20 -0
- requirements.txt +10 -0
- run.sh +13 -0
.env
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Desktop paths
|
2 |
+
DATA_DIR=/Users/gen.overseerlupo/Desktop/grants_rag_app/data
|
3 |
+
DOCSTORE_DIR=/Users/gen.overseerlupo/Desktop/grants_rag_app/data/docstore
|
4 |
+
INDEX_DIR=/Users/gen.overseerlupo/Desktop/grants_rag_app/data/index
|
5 |
+
EXPORT_DIR=/Users/gen.overseerlupo/Desktop/grants_rag_app/data/exports
|
.env.example
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Desktop paths
|
2 |
+
DATA_DIR=/Users/gen.overseerlupo/Desktop/grants_rag_app/data
|
3 |
+
DOCSTORE_DIR=/Users/gen.overseerlupo/Desktop/grants_rag_app/data/docstore
|
4 |
+
INDEX_DIR=/Users/gen.overseerlupo/Desktop/grants_rag_app/data/index
|
5 |
+
EXPORT_DIR=/Users/gen.overseerlupo/Desktop/grants_rag_app/data/exports
|
.gitattributes
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
assets/*.{jpg filter=lfs diff=lfs merge=lfs -text
|
2 |
+
jpeg filter=lfs diff=lfs merge=lfs -text
|
3 |
+
png filter=lfs diff=lfs merge=lfs -text
|
4 |
+
gif filter=lfs diff=lfs merge=lfs -text
|
5 |
+
webp} filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.venv/
|
2 |
+
__pycache__/
|
3 |
+
*.pyc
|
4 |
+
.DS_Store
|
5 |
+
*.so
|
6 |
+
*.dylib
|
7 |
+
*.dll
|
8 |
+
*.map
|
9 |
+
*.bin
|
10 |
+
*.pt
|
11 |
+
*.pth
|
12 |
+
*.npy
|
13 |
+
data/index/
|
14 |
+
data/docstore/
|
Makefile
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.PHONY: ingest export ui clean venv stop restart ps
|
2 |
+
|
3 |
+
venv:
|
4 |
+
python3 -m venv .venv && . .venv/bin/activate && pip install -U pip wheel && pip install -r requirements.txt
|
5 |
+
|
6 |
+
ingest:
|
7 |
+
python app/main.py ingest
|
8 |
+
|
9 |
+
search:
|
10 |
+
python app/main.py search --q "capacity building"
|
11 |
+
|
12 |
+
export:
|
13 |
+
python app/main.py export --q "capacity building" --out data/exports/results.csv
|
14 |
+
|
15 |
+
ui:
|
16 |
+
streamlit run app/ui_streamlit.py
|
17 |
+
|
18 |
+
ps:
|
19 |
+
ps aux | grep streamlit | grep -v grep || true
|
20 |
+
|
21 |
+
stop:
|
22 |
+
- pkill -f "streamlit run app/ui_streamlit.py" || true
|
23 |
+
- pgrep -f "streamlit" | xargs kill -9 2>/dev/null || true
|
24 |
+
|
25 |
+
restart: stop
|
26 |
+
streamlit run app/ui_streamlit.py
|
27 |
+
|
28 |
+
clean:
|
29 |
+
rm -rf data/index data/docstore data/exports/*.csv
|
README.md
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Grants Discovery RAG App (v6.2 hotfix)
|
2 |
+
|
3 |
+
---
|
4 |
+
title: Grants RAG Finder
|
5 |
+
sdk: streamlit
|
6 |
+
app_file: app/ui_streamlit.py
|
7 |
+
---
|
8 |
+
|
9 |
+
|
10 |
+
- Desktop paths baked in
|
11 |
+
- Grants.gov API + topic routes (General, Elderly, Prison, Evangelism, Vehicles, FTA 5310)
|
12 |
+
- Makefile helpers: `make ps`, `make stop`, `make restart`
|
13 |
+
- **Hotfix**: corrected Streamlit write line (single line, no syntax error)
|
14 |
+
|
15 |
+
## One-Time Setup
|
16 |
+
```bash
|
17 |
+
cd ~/Desktop
|
18 |
+
unzip ~/Downloads/grants_rag_app_desktop_v6_2.zip -d ~/Desktop/
|
19 |
+
cd /Users/gen.overseerlupo/Desktop/grants_rag_app
|
20 |
+
python3 -m venv .venv
|
21 |
+
source .venv/bin/activate
|
22 |
+
pip install -U pip wheel
|
23 |
+
pip install -r requirements.txt
|
24 |
+
cp .env.example .env
|
25 |
+
open -a TextEdit .env config/sources.yaml
|
26 |
+
```
|
27 |
+
|
28 |
+
## Run
|
29 |
+
```bash
|
30 |
+
cd /Users/gen.overseerlupo/Desktop/grants_rag_app
|
31 |
+
source .venv/bin/activate
|
32 |
+
make ingest
|
33 |
+
make ui
|
34 |
+
```
|
app/__init__.py
ADDED
File without changes
|
app/ingest.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, json
|
2 |
+
from pathlib import Path
|
3 |
+
from typing import Dict, List
|
4 |
+
import yaml
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
import numpy as np
|
7 |
+
from app.sources.grantsgov_api import load_fallback_json # ← add this line
|
8 |
+
|
9 |
+
|
10 |
+
def load_config(cfg_path: str) -> Dict:
|
11 |
+
with open(cfg_path, "r") as f:
|
12 |
+
return yaml.safe_load(f)
|
13 |
+
|
14 |
+
def ensure_dirs(env: Dict):
|
15 |
+
Path(env["DOCSTORE_DIR"]).mkdir(parents=True, exist_ok=True)
|
16 |
+
Path(env["INDEX_DIR"]).mkdir(parents=True, exist_ok=True)
|
17 |
+
|
18 |
+
def hash_id(text: str) -> str:
|
19 |
+
import hashlib
|
20 |
+
return hashlib.sha1(text.encode("utf-8")).hexdigest()[:16]
|
21 |
+
|
22 |
+
def normalize_record(src_name: str, rec: Dict) -> Dict:
|
23 |
+
return {
|
24 |
+
"id": rec.get("id") or hash_id(rec.get("title","") + rec.get("url","")),
|
25 |
+
"source": src_name,
|
26 |
+
"title": rec.get("title","").strip(),
|
27 |
+
"summary": rec.get("summary","").strip(),
|
28 |
+
"url": rec.get("url","").strip(),
|
29 |
+
"deadline": rec.get("deadline","").strip(),
|
30 |
+
"eligibility": rec.get("eligibility","").strip(),
|
31 |
+
"agency": rec.get("agency","").strip(),
|
32 |
+
"geo": rec.get("geo","").strip(),
|
33 |
+
"categories": rec.get("categories", []),
|
34 |
+
"raw": rec
|
35 |
+
}
|
36 |
+
|
37 |
+
def collect_from_grantsgov_api(src: Dict) -> List[Dict]:
|
38 |
+
# Import here so module runs cleanly
|
39 |
+
from app.sources.grantsgov_api import search_grants, map_to_records, load_fallback_json
|
40 |
+
|
41 |
+
# Read API config from YAML
|
42 |
+
api_cfg = src.get("api", {})
|
43 |
+
page_size = int(api_cfg.get("page_size", 100))
|
44 |
+
max_pages = int(api_cfg.get("max_pages", 5))
|
45 |
+
payload = api_cfg.get("payload", {}) or {}
|
46 |
+
|
47 |
+
# Try live API first (POST+JSON). If it fails, use local fallback_json (if provided).
|
48 |
+
try:
|
49 |
+
items = search_grants(src.get("url"), payload, page_size=page_size, max_pages=max_pages)
|
50 |
+
except Exception as e:
|
51 |
+
fb = src.get("fallback_json")
|
52 |
+
if fb:
|
53 |
+
print(f"[WARN] API failed ({e}); using fallback: {fb}")
|
54 |
+
items = load_fallback_json(fb)
|
55 |
+
else:
|
56 |
+
raise
|
57 |
+
|
58 |
+
# Map raw items to a basic schema (id/title/summary/agency/dates)
|
59 |
+
basic = map_to_records(items)
|
60 |
+
|
61 |
+
# Enrich with fields our normalize_record expects
|
62 |
+
geo = src.get("geo", "US")
|
63 |
+
categories = src.get("categories", [])
|
64 |
+
recs: List[Dict] = []
|
65 |
+
for b in basic:
|
66 |
+
raw = b.get("raw") or {}
|
67 |
+
# Try to find a URL if present in the raw payload
|
68 |
+
url = (
|
69 |
+
raw.get("opportunityUrl")
|
70 |
+
or raw.get("synopsisURL")
|
71 |
+
or raw.get("url")
|
72 |
+
or ""
|
73 |
+
)
|
74 |
+
deadline = b.get("closeDate") or raw.get("closeDate") or ""
|
75 |
+
|
76 |
+
rec = {
|
77 |
+
"id": b.get("id"),
|
78 |
+
"title": b.get("title") or "",
|
79 |
+
"summary": b.get("summary") or "",
|
80 |
+
"url": url,
|
81 |
+
"deadline": deadline,
|
82 |
+
"eligibility": raw.get("eligibility") or "",
|
83 |
+
"agency": b.get("agency") or raw.get("agency") or "",
|
84 |
+
"geo": geo,
|
85 |
+
"categories": categories,
|
86 |
+
}
|
87 |
+
recs.append(normalize_record(src["name"], rec))
|
88 |
+
|
89 |
+
return recs
|
90 |
+
|
91 |
+
|
92 |
+
def save_docstore(recs: List[Dict], env: Dict):
|
93 |
+
ds_path = Path(env["DOCSTORE_DIR"]) / "docstore.jsonl"
|
94 |
+
with open(ds_path, "w") as f:
|
95 |
+
for r in recs:
|
96 |
+
f.write(json.dumps(r) + "\n")
|
97 |
+
return str(ds_path)
|
98 |
+
|
99 |
+
def build_index(env: Dict):
|
100 |
+
ds_path = Path(env["DOCSTORE_DIR"]) / "docstore.jsonl"
|
101 |
+
if not ds_path.exists():
|
102 |
+
raise RuntimeError("Docstore not found. Run ingest first.")
|
103 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
104 |
+
texts = []
|
105 |
+
metas = []
|
106 |
+
with open(ds_path, "r") as f:
|
107 |
+
for line in f:
|
108 |
+
rec = json.loads(line)
|
109 |
+
text = " | ".join([rec.get("title",""), rec.get("summary",""), rec.get("eligibility",""), rec.get("agency","")])
|
110 |
+
texts.append(text)
|
111 |
+
metas.append({"id": rec["id"], "title": rec["title"], "url": rec["url"], "source": rec["source"], "geo": rec["geo"], "categories": rec["categories"]})
|
112 |
+
emb = model.encode(texts, convert_to_numpy=True, show_progress_bar=True, normalize_embeddings=True)
|
113 |
+
|
114 |
+
import faiss
|
115 |
+
dim = emb.shape[1]
|
116 |
+
index = faiss.IndexFlatIP(dim)
|
117 |
+
index.add(emb)
|
118 |
+
|
119 |
+
Path(env["INDEX_DIR"]).mkdir(parents=True, exist_ok=True)
|
120 |
+
faiss.write_index(index, str(Path(env["INDEX_DIR"]) / "faiss.index"))
|
121 |
+
with open(Path(env["INDEX_DIR"]) / "meta.json", "w") as f:
|
122 |
+
json.dump(metas, f)
|
123 |
+
return len(texts)
|
124 |
+
|
125 |
+
def ingest(cfg_path: str, env: Dict):
|
126 |
+
cfg = load_config(cfg_path)
|
127 |
+
ensure_dirs(env)
|
128 |
+
all_recs = []
|
129 |
+
for src in cfg.get("sources", []):
|
130 |
+
if not src.get("enabled", False):
|
131 |
+
continue
|
132 |
+
if src["type"] == "grantsgov_api":
|
133 |
+
recs = collect_from_grantsgov_api(src)
|
134 |
+
else:
|
135 |
+
recs = []
|
136 |
+
all_recs.extend(recs)
|
137 |
+
path = save_docstore(all_recs, env)
|
138 |
+
n_indexed = build_index(env)
|
139 |
+
return path, n_indexed
|
140 |
+
|
141 |
+
if __name__ == "__main__":
|
142 |
+
from dotenv import dotenv_values
|
143 |
+
env = dotenv_values(".env")
|
144 |
+
cfg_path = "config/v6.yaml"
|
145 |
+
p, n = ingest(cfg_path, env)
|
146 |
+
print(f"Ingested {n} records. Docstore at {p}")
|
app/main.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import argparse, os, json
|
2 |
+
from pathlib import Path
|
3 |
+
from dotenv import dotenv_values
|
4 |
+
import pandas as pd
|
5 |
+
|
6 |
+
from app.ingest import ingest
|
7 |
+
from app.search import search
|
8 |
+
|
9 |
+
def get_env():
|
10 |
+
env = dotenv_values(".env")
|
11 |
+
if not env:
|
12 |
+
env = {
|
13 |
+
"DATA_DIR":"data",
|
14 |
+
"DOCSTORE_DIR":"data/docstore",
|
15 |
+
"INDEX_DIR":"data/index",
|
16 |
+
"EXPORT_DIR":"data/exports",
|
17 |
+
}
|
18 |
+
for k in ["DATA_DIR","DOCSTORE_DIR","INDEX_DIR","EXPORT_DIR"]:
|
19 |
+
os.makedirs(env[k], exist_ok=True)
|
20 |
+
return env
|
21 |
+
|
22 |
+
def cmd_ingest(args):
|
23 |
+
env = get_env()
|
24 |
+
path, n = ingest("config/sources.yaml", env)
|
25 |
+
print(f"Ingest complete. {n} records. Docstore: {path}")
|
26 |
+
|
27 |
+
def cmd_search(args):
|
28 |
+
env = get_env()
|
29 |
+
filters = {}
|
30 |
+
if args.geo:
|
31 |
+
filters["geo"] = args.geo.split(",")
|
32 |
+
if args.categories:
|
33 |
+
filters["categories"] = args.categories.split(",")
|
34 |
+
res = search(args.q, env, top_k=args.k, filters=filters)
|
35 |
+
for r in res:
|
36 |
+
print(f"- {r['title']} [{r['source']}] ({r['geo']}) score={r['score']:.3f}")
|
37 |
+
print(f" {r['url']}")
|
38 |
+
|
39 |
+
def cmd_export(args):
|
40 |
+
env = get_env()
|
41 |
+
filters = {}
|
42 |
+
if args.geo:
|
43 |
+
filters["geo"] = args.geo.split(",")
|
44 |
+
if args.categories:
|
45 |
+
filters["categories"] = args.categories.split(",")
|
46 |
+
res = search(args.q, env, top_k=args.k, filters=filters)
|
47 |
+
out = Path(env["EXPORT_DIR"]) / (args.out or "results.csv")
|
48 |
+
pd.DataFrame(res).to_csv(out, index=False)
|
49 |
+
print(f"Exported {len(res)} rows to {out}")
|
50 |
+
|
51 |
+
if __name__ == "__main__":
|
52 |
+
p = argparse.ArgumentParser()
|
53 |
+
sub = p.add_subparsers(dest="cmd")
|
54 |
+
|
55 |
+
p_ing = sub.add_parser("ingest", help="Ingest sources and build index")
|
56 |
+
p_ing.set_defaults(func=cmd_ingest)
|
57 |
+
|
58 |
+
p_search = sub.add_parser("search", help="Search index")
|
59 |
+
p_search.add_argument("--q", required=True)
|
60 |
+
p_search.add_argument("--k", type=int, default=15)
|
61 |
+
p_search.add_argument("--geo", default="")
|
62 |
+
p_search.add_argument("--categories", default="")
|
63 |
+
p_search.set_defaults(func=cmd_search)
|
64 |
+
|
65 |
+
p_export = sub.add_parser("export", help="Export search results to CSV")
|
66 |
+
p_export.add_argument("--q", required=True)
|
67 |
+
p_export.add_argument("--k", type=int, default=50)
|
68 |
+
p_export.add_argument("--geo", default="")
|
69 |
+
p_export.add_argument("--categories", default="")
|
70 |
+
p_export.add_argument("--out", default="results.csv")
|
71 |
+
p_export.set_defaults(func=cmd_export)
|
72 |
+
|
73 |
+
args = p.parse_args()
|
74 |
+
if not args.cmd:
|
75 |
+
p.print_help()
|
76 |
+
else:
|
77 |
+
args.func(args)
|
app/search.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
from pathlib import Path
|
3 |
+
from typing import List, Dict
|
4 |
+
from sentence_transformers import SentenceTransformer
|
5 |
+
import numpy as np
|
6 |
+
|
7 |
+
def load_index(env: Dict):
|
8 |
+
import faiss, json
|
9 |
+
index_path = Path(env["INDEX_DIR"]) / "faiss.index"
|
10 |
+
meta_path = Path(env["INDEX_DIR"]) / "meta.json"
|
11 |
+
if not index_path.exists():
|
12 |
+
raise RuntimeError("Index not found. Run ingest first.")
|
13 |
+
index = faiss.read_index(str(index_path))
|
14 |
+
metas = json.load(open(meta_path))
|
15 |
+
return index, metas
|
16 |
+
|
17 |
+
def embed(texts: List[str]):
|
18 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
19 |
+
return model.encode(texts, convert_to_numpy=True, normalize_embeddings=True)
|
20 |
+
|
21 |
+
def search(q: str, env: Dict, top_k: int = 15, filters: Dict = None) -> List[Dict]:
|
22 |
+
index, metas = load_index(env)
|
23 |
+
qv = embed([q])
|
24 |
+
scores, idxs = index.search(qv, top_k)
|
25 |
+
results = []
|
26 |
+
for score, idx in zip(scores[0], idxs[0]):
|
27 |
+
if idx == -1: continue
|
28 |
+
m = metas[idx]
|
29 |
+
if filters:
|
30 |
+
if "geo" in filters and filters["geo"] and m.get("geo") not in filters["geo"]:
|
31 |
+
continue
|
32 |
+
if "categories" in filters and filters["categories"]:
|
33 |
+
if not set(filters["categories"]).intersection(set(m.get("categories",[]))):
|
34 |
+
continue
|
35 |
+
m["score"] = float(score)
|
36 |
+
results.append(m)
|
37 |
+
return results
|
app/sources/grantsgov_api.py
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app/sources/grantsgov_api.py
|
2 |
+
import os
|
3 |
+
import time
|
4 |
+
import json
|
5 |
+
import logging
|
6 |
+
from typing import Dict, List, Tuple
|
7 |
+
import requests
|
8 |
+
|
9 |
+
log = logging.getLogger(__name__)
|
10 |
+
|
11 |
+
DEFAULT_URL = "https://apply07.grants.gov/grantsws/rest/opportunities/search/"
|
12 |
+
|
13 |
+
def _session():
|
14 |
+
s = requests.Session()
|
15 |
+
s.headers.update({
|
16 |
+
"Accept": "application/json",
|
17 |
+
"Content-Type": "application/json",
|
18 |
+
"User-Agent": "grants-rag-app/0.1 (+https://awakeningacts.org)"
|
19 |
+
})
|
20 |
+
return s
|
21 |
+
|
22 |
+
def search_grants(
|
23 |
+
url: str,
|
24 |
+
payload: Dict,
|
25 |
+
page_size: int = 50,
|
26 |
+
max_pages: int = 5,
|
27 |
+
delay_sec: float = 0.4
|
28 |
+
) -> List[Dict]:
|
29 |
+
"""
|
30 |
+
Calls Grants.gov search API with POST+JSON.
|
31 |
+
Paginates using numRecords + startRecordNum (1-based).
|
32 |
+
Returns list of 'opportunity' dicts.
|
33 |
+
"""
|
34 |
+
url = (url or DEFAULT_URL).rstrip("/") + "/"
|
35 |
+
sess = _session()
|
36 |
+
results: List[Dict] = []
|
37 |
+
|
38 |
+
start = 1
|
39 |
+
for page in range(max_pages):
|
40 |
+
body = dict(payload or {})
|
41 |
+
# Typical pagination keys accepted by the API
|
42 |
+
body.setdefault("oppStatuses", "posted") # adjust if you want all statuses
|
43 |
+
body["numRecords"] = page_size
|
44 |
+
body["startRecordNum"] = start
|
45 |
+
|
46 |
+
try:
|
47 |
+
resp = sess.post(url, json=body, timeout=30)
|
48 |
+
# If servers briefly reject, a short retry helps
|
49 |
+
if resp.status_code in (502, 503, 504):
|
50 |
+
time.sleep(1.0)
|
51 |
+
resp = sess.post(url, json=body, timeout=30)
|
52 |
+
resp.raise_for_status()
|
53 |
+
except requests.HTTPError as e:
|
54 |
+
# Provide a helpful log and bubble up so caller can decide fallback
|
55 |
+
msg = f"Grants.gov search failed (status {getattr(resp,'status_code',None)}): {e}"
|
56 |
+
log.error(msg)
|
57 |
+
raise
|
58 |
+
except requests.RequestException as e:
|
59 |
+
log.error(f"Network error calling Grants.gov: {e}")
|
60 |
+
raise
|
61 |
+
|
62 |
+
data = resp.json() if resp.content else {}
|
63 |
+
# API typically returns 'opportunities' or 'oppHits' style structures.
|
64 |
+
items = (
|
65 |
+
data.get("opportunities")
|
66 |
+
or data.get("oppHits")
|
67 |
+
or data.get("searchResults")
|
68 |
+
or []
|
69 |
+
)
|
70 |
+
|
71 |
+
if not items:
|
72 |
+
break
|
73 |
+
|
74 |
+
results.extend(items)
|
75 |
+
# If fewer than a full page returned, we've reached the end.
|
76 |
+
if len(items) < page_size:
|
77 |
+
break
|
78 |
+
|
79 |
+
start += page_size
|
80 |
+
if delay_sec:
|
81 |
+
time.sleep(delay_sec)
|
82 |
+
|
83 |
+
return results
|
84 |
+
|
85 |
+
|
86 |
+
def map_to_records(items: List[Dict]) -> List[Dict]:
|
87 |
+
"""
|
88 |
+
Map raw Grants.gov items to your internal record schema used by the app.
|
89 |
+
Tweak keys below to match your indexer.
|
90 |
+
"""
|
91 |
+
out = []
|
92 |
+
for it in items or []:
|
93 |
+
# These keys vary depending on API flavor; use .get(...) to be safe.
|
94 |
+
out.append({
|
95 |
+
"id": it.get("opportunityNumber") or it.get("opportunityId") or it.get("id"),
|
96 |
+
"title": it.get("opportunityTitle") or it.get("title"),
|
97 |
+
"summary": it.get("synopsis") or it.get("description") or it.get("summary"),
|
98 |
+
"agency": it.get("agency"), # sometimes 'agencyCode' / 'agencyName'
|
99 |
+
"postedDate": it.get("postedDate") or it.get("openDate"),
|
100 |
+
"closeDate": it.get("closeDate") or it.get("closeDateExplanation"),
|
101 |
+
"raw": it, # keep original for debugging
|
102 |
+
})
|
103 |
+
return out
|
104 |
+
|
105 |
+
|
106 |
+
# ---------- Optional Local Fallback Utilities ----------
|
107 |
+
|
108 |
+
def load_fallback_json(path: str) -> List[Dict]:
|
109 |
+
try:
|
110 |
+
with open(path, "r") as f:
|
111 |
+
data = json.load(f)
|
112 |
+
# allow either {"opportunities":[...]} or just [...]
|
113 |
+
if isinstance(data, dict):
|
114 |
+
data = data.get("opportunities") or data.get("items") or data.get("data") or []
|
115 |
+
if not isinstance(data, list):
|
116 |
+
return []
|
117 |
+
return data
|
118 |
+
except Exception as e:
|
119 |
+
log.error(f"Failed to load fallback json: {e}")
|
120 |
+
return []
|
app/ui_streamlit.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os, json
|
2 |
+
import streamlit as st
|
3 |
+
from dotenv import dotenv_values
|
4 |
+
from app.search import search
|
5 |
+
|
6 |
+
st.set_page_config(page_title="Grants Discovery RAG", layout="wide")
|
7 |
+
st.title("Grants Discovery RAG (Capacity Building)")
|
8 |
+
|
9 |
+
env = dotenv_values(".env")
|
10 |
+
if not env:
|
11 |
+
st.warning("No .env found. Using defaults.")
|
12 |
+
env = {
|
13 |
+
"DATA_DIR":"data",
|
14 |
+
"DOCSTORE_DIR":"data/docstore",
|
15 |
+
"INDEX_DIR":"data/index",
|
16 |
+
"EXPORT_DIR":"data/exports",
|
17 |
+
}
|
18 |
+
|
19 |
+
preset = st.radio("Quick topic:", ["General", "Elderly", "Prison Ministry", "Evangelism", "Vehicles/Transport", "FTA 5310"], horizontal=True)
|
20 |
+
default_q = {
|
21 |
+
"General": "capacity building",
|
22 |
+
"Elderly": "capacity building for seniors and aging services",
|
23 |
+
"Prison Ministry": "capacity building for reentry and prison ministry",
|
24 |
+
"Evangelism": "capacity building for faith and community outreach",
|
25 |
+
"Vehicles/Transport": "capacity building transportation vehicles vans buses mobility",
|
26 |
+
"FTA 5310": "5310 Enhanced Mobility Seniors Individuals with Disabilities",
|
27 |
+
}.get(preset, "capacity building")
|
28 |
+
|
29 |
+
q = st.text_input("Search query", value=default_q)
|
30 |
+
geo = st.multiselect("Geo filter", options=["US","MD","MA"], default=["US"])
|
31 |
+
cats = st.multiselect("Category filter", options=["capacity_building","elderly","prison_ministry","evangelism","transportation","vehicle"], default=["capacity_building"])
|
32 |
+
top_k = st.slider("Results", 5, 50, 15)
|
33 |
+
|
34 |
+
col1, col2 = st.columns([1,1])
|
35 |
+
with col1:
|
36 |
+
if st.button("Search"):
|
37 |
+
try:
|
38 |
+
results = search(q, env, top_k=top_k, filters={"geo": geo, "categories": cats})
|
39 |
+
st.session_state["results"] = results
|
40 |
+
except Exception as e:
|
41 |
+
st.error(str(e))
|
42 |
+
|
43 |
+
with col2:
|
44 |
+
if st.button("Export Results to CSV"):
|
45 |
+
results = st.session_state.get("results", [])
|
46 |
+
if not results:
|
47 |
+
st.warning("No results to export. Run a search first.")
|
48 |
+
else:
|
49 |
+
os.makedirs(env["EXPORT_DIR"], exist_ok=True)
|
50 |
+
out_path = os.path.join(env["EXPORT_DIR"], "results.csv")
|
51 |
+
import pandas as pd
|
52 |
+
pd.DataFrame(results).to_csv(out_path, index=False)
|
53 |
+
st.success(f"Exported to {out_path}")
|
54 |
+
|
55 |
+
st.markdown("---")
|
56 |
+
results = st.session_state.get("results", [])
|
57 |
+
if results:
|
58 |
+
for r in results:
|
59 |
+
st.markdown(f"### {r.get('title','(no title)')}")
|
60 |
+
st.write(f"**Source:** {r.get('source','')} | **Geo:** {r.get('geo','')} | **Categories:** {', '.join(r.get('categories',[]))}")
|
61 |
+
link = r.get('url','')
|
62 |
+
if link and not link.startswith("http"):
|
63 |
+
st.caption("Note: This item may display an ID or number instead of a full link. Open on Grants.gov if needed.")
|
64 |
+
# ✅ Fixed: one line, no syntax error
|
65 |
+
st.write(f"[Open Link]({link}) \nScore: {r.get('score', 0):.3f}")
|
66 |
+
st.markdown("---")
|
67 |
+
else:
|
68 |
+
st.info("Enter a query and click Search.")
|
config/sources.yaml
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Minimal, valid config — v6.2
|
2 |
+
sources:
|
3 |
+
- name: "Grants.gov (API: capacity building - general)"
|
4 |
+
type: grantsgov_api
|
5 |
+
enabled: true
|
6 |
+
url: "https://www.grants.gov/grantsws/rest/opportunities/search/"
|
7 |
+
geo: "US"
|
8 |
+
categories: ["capacity_building"]
|
9 |
+
api:
|
10 |
+
page_size: 100
|
11 |
+
max_pages: 5
|
12 |
+
payload:
|
13 |
+
keyword: "capacity building"
|
14 |
+
opportunityStatuses: ["posted"]
|
15 |
+
sortBy: "openDate|desc"
|
16 |
+
|
17 |
+
- name: "Grants.gov (API: capacity building - vehicles/transportation)"
|
18 |
+
type: grantsgov_api
|
19 |
+
enabled: true
|
20 |
+
url: "https://www.grants.gov/grantsws/rest/opportunities/search/"
|
21 |
+
geo: "US"
|
22 |
+
categories: ["capacity_building", "transportation", "vehicle"]
|
23 |
+
api:
|
24 |
+
page_size: 100
|
25 |
+
max_pages: 5
|
26 |
+
payload:
|
27 |
+
keyword: "capacity building transportation vehicle bus van transit mobility"
|
28 |
+
opportunityStatuses: ["posted"]
|
29 |
+
sortBy: "openDate|desc"
|
30 |
+
|
31 |
+
- name: "Grants.gov (API: FTA Section 5310 - Enhanced Mobility)"
|
32 |
+
type: grantsgov_api
|
33 |
+
enabled: true
|
34 |
+
url: "https://www.grants.gov/grantsws/rest/opportunities/search/"
|
35 |
+
geo: "US"
|
36 |
+
categories: ["capacity_building", "elderly", "transportation", "vehicle"]
|
37 |
+
api:
|
38 |
+
page_size: 100
|
39 |
+
max_pages: 3
|
40 |
+
payload:
|
41 |
+
keyword: "5310 Enhanced Mobility Seniors Individuals with Disabilities"
|
42 |
+
opportunityStatuses: ["posted"]
|
43 |
+
sortBy: "openDate|desc"
|
44 |
+
agencyCodes: ["FTA"]
|
config/v6.yaml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Minimal, valid config — v6.2
|
2 |
+
sources:
|
3 |
+
- name: "Grants.gov (API: capacity building - general)"
|
4 |
+
type: grantsgov_api
|
5 |
+
enabled: true
|
6 |
+
url: "https://apply07.grants.gov/grantsws/rest/opportunities/search/"
|
7 |
+
fallback_json: data/grants_fallback_sample.json
|
8 |
+
geo: "US"
|
9 |
+
categories:
|
10 |
+
- capacity_building
|
11 |
+
api:
|
12 |
+
page_size: 25
|
13 |
+
max_pages: 1
|
14 |
+
payload:
|
15 |
+
keyword: "capacity building"
|
16 |
+
oppStatuses: "posted"
|
17 |
+
|
18 |
+
- name: "Grants.gov (API: capacity building - vehicles/transportation)"
|
19 |
+
type: grantsgov_api
|
20 |
+
enabled: true
|
21 |
+
url: "https://apply07.grants.gov/grantsws/rest/opportunities/search/"
|
22 |
+
fallback_json: data/grants_fallback_sample.json
|
23 |
+
geo: "US"
|
24 |
+
categories:
|
25 |
+
- capacity_building
|
26 |
+
- transportation
|
27 |
+
- vehicle
|
28 |
+
api:
|
29 |
+
page_size: 25
|
30 |
+
max_pages: 1
|
31 |
+
payload:
|
32 |
+
keyword: "capacity building transportation vehicle bus van transit mobility"
|
33 |
+
oppStatuses: "posted"
|
34 |
+
|
35 |
+
- name: "Grants.gov (API: FTA Section 5310 - Enhanced Mobility)"
|
36 |
+
type: grantsgov_api
|
37 |
+
enabled: true
|
38 |
+
url: "https://apply07.grants.gov/grantsws/rest/opportunities/search/"
|
39 |
+
fallback_json: data/grants_fallback_sample.json
|
40 |
+
geo: "US"
|
41 |
+
categories:
|
42 |
+
- capacity_building
|
43 |
+
- elderly
|
44 |
+
- transportation
|
45 |
+
- vehicle
|
46 |
+
api:
|
47 |
+
page_size: 25
|
48 |
+
max_pages: 1
|
49 |
+
payload:
|
50 |
+
keyword: "5310 Enhanced Mobility Seniors Individuals with Disabilities"
|
51 |
+
oppStatuses: "posted"
|
52 |
+
agencyCodes: ["FTA"]
|
data/exports/results.csv
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
id,title,url,source,geo,categories,score
|
2 |
+
SAMPLE-002,Senior Services Capacity Grant,,Grants.gov (API: FTA Section 5310 - Enhanced Mobility),US,"['capacity_building', 'elderly', 'transportation', 'vehicle']",0.27466529607772827
|
3 |
+
SAMPLE-002,Senior Services Capacity Grant,,Grants.gov (API: capacity building - vehicles/transportation),US,"['capacity_building', 'transportation', 'vehicle']",0.27466529607772827
|
4 |
+
SAMPLE-002,Senior Services Capacity Grant,,Grants.gov (API: capacity building - general),US,['capacity_building'],0.27466529607772827
|
5 |
+
SAMPLE-001,Community Outreach Mini-Grant,,Grants.gov (API: FTA Section 5310 - Enhanced Mobility),US,"['capacity_building', 'elderly', 'transportation', 'vehicle']",0.028130291029810905
|
6 |
+
SAMPLE-001,Community Outreach Mini-Grant,,Grants.gov (API: capacity building - vehicles/transportation),US,"['capacity_building', 'transportation', 'vehicle']",0.028130291029810905
|
7 |
+
SAMPLE-001,Community Outreach Mini-Grant,,Grants.gov (API: capacity building - general),US,['capacity_building'],0.028130291029810905
|
data/grants_fallback_sample.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"opportunities": [
|
3 |
+
{
|
4 |
+
"opportunityNumber": "SAMPLE-001",
|
5 |
+
"opportunityTitle": "Community Outreach Mini-Grant",
|
6 |
+
"synopsis": "Supports local nonprofits delivering basic needs and outreach services.",
|
7 |
+
"agency": "Sample Foundation",
|
8 |
+
"postedDate": "2025-09-01",
|
9 |
+
"closeDate": "2025-10-15"
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"opportunityNumber": "SAMPLE-002",
|
13 |
+
"opportunityTitle": "Senior Services Capacity Grant",
|
14 |
+
"synopsis": "Capacity building for organizations serving seniors (case management, benefits access).",
|
15 |
+
"agency": "Sample City",
|
16 |
+
"postedDate": "2025-08-25",
|
17 |
+
"closeDate": "2025-10-01"
|
18 |
+
}
|
19 |
+
]
|
20 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit>=1.36
|
2 |
+
pydantic>=2.7
|
3 |
+
python-dotenv>=1.0
|
4 |
+
pandas>=2.2
|
5 |
+
numpy>=1.26
|
6 |
+
faiss-cpu>=1.8
|
7 |
+
sentence-transformers>=3.0
|
8 |
+
requests>=2.32
|
9 |
+
pyyaml>=6.0
|
10 |
+
tqdm>=4.66
|
run.sh
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
set -e
|
3 |
+
cd "$(dirname "$0")"
|
4 |
+
|
5 |
+
# Activate venv (create if missing)
|
6 |
+
if [ ! -d ".venv" ]; then
|
7 |
+
python3 -m venv .venv
|
8 |
+
fi
|
9 |
+
source .venv/bin/activate
|
10 |
+
|
11 |
+
# Make imports work and launch the UI
|
12 |
+
export PYTHONPATH="$(pwd)"
|
13 |
+
python -m streamlit run app/ui_streamlit.py
|