Spaces:
Sleeping
Sleeping
Commit
·
796e66c
1
Parent(s):
b363844
feat: add state-level HTML/PDF adapters and updated capacity filters
Browse files- .gitignore +7 -0
- app/ingest.py +294 -14
- config/sources.yaml +148 -1
- project-plan-rag.rtf +569 -0
.gitignore
CHANGED
@@ -1,3 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
.venv/
|
2 |
__pycache__/
|
3 |
*.pyc
|
|
|
1 |
+
|
2 |
+
venv/
|
3 |
+
*.pyc
|
4 |
+
__pycache__/
|
5 |
+
.DS_Store
|
6 |
+
Makefile.old
|
7 |
+
start-up-project.txt
|
8 |
.venv/
|
9 |
__pycache__/
|
10 |
*.pyc
|
app/ingest.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
from __future__ import annotations
|
3 |
import json
|
4 |
from pathlib import Path
|
5 |
-
from typing import Dict, List, Any
|
6 |
|
7 |
import yaml
|
8 |
import numpy as np
|
@@ -11,6 +11,12 @@ from sentence_transformers import SentenceTransformer
|
|
11 |
from app.paths import DOCSTORE_DIR, INDEX_DIR
|
12 |
from .normalize import normalize # ← central normalizer
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
# -------------------- Config --------------------
|
16 |
|
@@ -19,6 +25,65 @@ def load_config(cfg_path: str) -> Dict:
|
|
19 |
return yaml.safe_load(f)
|
20 |
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
# -------------------- Grants.gov collector --------------------
|
23 |
|
24 |
def _collect_from_grantsgov_api(src: Dict) -> List[Dict[str, Any]]:
|
@@ -39,6 +104,194 @@ def _collect_from_grantsgov_api(src: Dict) -> List[Dict[str, Any]]:
|
|
39 |
return [h for h in hits if isinstance(h, dict)]
|
40 |
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
# -------------------- Write docstore & build index --------------------
|
43 |
|
44 |
def _save_docstore(recs: List[Dict[str, Any]]) -> str:
|
@@ -55,7 +308,6 @@ def _build_index_from_docstore() -> int:
|
|
55 |
if not ds_path.exists():
|
56 |
raise RuntimeError("Docstore not found. Run ingest first.")
|
57 |
|
58 |
-
# Load records → texts + metas
|
59 |
texts: List[str] = []
|
60 |
metas: List[Dict[str, Any]] = []
|
61 |
with ds_path.open("r", encoding="utf-8") as f:
|
@@ -85,16 +337,15 @@ def _build_index_from_docstore() -> int:
|
|
85 |
print(f"[index] Rows loaded from docstore: {len(texts)}")
|
86 |
|
87 |
if not texts:
|
88 |
-
|
89 |
-
(INDEX_DIR).mkdir(parents=True, exist_ok=True)
|
90 |
(INDEX_DIR / "meta.json").write_text(json.dumps([], ensure_ascii=False))
|
91 |
print("[index] No texts to embed. Wrote empty meta.json.")
|
92 |
return 0
|
93 |
|
94 |
-
# Embed (CPU default;
|
95 |
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
96 |
model.max_seq_length = 256
|
97 |
-
batch = max(8, min(32, len(texts)))
|
98 |
emb = model.encode(
|
99 |
texts,
|
100 |
convert_to_numpy=True,
|
@@ -117,22 +368,32 @@ def _build_index_from_docstore() -> int:
|
|
117 |
return len(texts)
|
118 |
|
119 |
|
120 |
-
# --------------------
|
|
|
|
|
121 |
|
122 |
def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
|
123 |
"""
|
124 |
-
Reads config, fetches from enabled sources, normalizes
|
125 |
-
|
|
|
126 |
"""
|
127 |
cfg = load_config(cfg_path)
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
all_rows: List[Dict[str, Any]] = []
|
130 |
for entry in cfg.get("sources", []):
|
131 |
if not entry.get("enabled"):
|
132 |
continue
|
133 |
|
134 |
name = entry.get("name", "<source>")
|
135 |
-
geo
|
136 |
cats = entry.get("categories") or []
|
137 |
static = {"geo": geo, "categories": cats}
|
138 |
|
@@ -143,20 +404,37 @@ def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
|
|
143 |
raw_hits = _collect_from_grantsgov_api(entry)
|
144 |
rows = [normalize("grants_gov", h, static) for h in raw_hits]
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
elif typ == "local_sample":
|
147 |
p = Path(entry["path"]).expanduser()
|
148 |
blob = json.loads(p.read_text(encoding="utf-8"))
|
149 |
items = blob.get("opportunities") or []
|
150 |
rows = [normalize("local_sample", op, static) for op in items]
|
151 |
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
print(f"[collect] {name} → {len(rows)} rows")
|
157 |
all_rows.extend(rows)
|
158 |
|
159 |
-
# ---- DEDUPE (id → url → title) ----
|
160 |
seen, unique = set(), []
|
161 |
for r in all_rows:
|
162 |
key = r.get("id") or r.get("url") or r.get("title")
|
@@ -172,6 +450,8 @@ def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
|
|
172 |
return path, n
|
173 |
|
174 |
|
|
|
|
|
175 |
if __name__ == "__main__":
|
176 |
import argparse
|
177 |
ap = argparse.ArgumentParser()
|
|
|
2 |
from __future__ import annotations
|
3 |
import json
|
4 |
from pathlib import Path
|
5 |
+
from typing import Dict, List, Any, Tuple, Optional
|
6 |
|
7 |
import yaml
|
8 |
import numpy as np
|
|
|
11 |
from app.paths import DOCSTORE_DIR, INDEX_DIR
|
12 |
from .normalize import normalize # ← central normalizer
|
13 |
|
14 |
+
import re
|
15 |
+
import time
|
16 |
+
import hashlib
|
17 |
+
import requests
|
18 |
+
from bs4 import BeautifulSoup
|
19 |
+
|
20 |
|
21 |
# -------------------- Config --------------------
|
22 |
|
|
|
25 |
return yaml.safe_load(f)
|
26 |
|
27 |
|
28 |
+
# -------------------- Capacity / Geo Filters (config-driven) --------------------
|
29 |
+
# controls live in config/sources.yaml:
|
30 |
+
# filters:
|
31 |
+
# capacity_only: true
|
32 |
+
# pa_md_only: false
|
33 |
+
|
34 |
+
_INCLUDE_PATTERNS = [re.compile(p, re.I) for p in [
|
35 |
+
r"\bcapacity(?:[-\s]?building)?\b",
|
36 |
+
r"\btechnical\s+assistance\b",
|
37 |
+
r"\bTA\b",
|
38 |
+
r"\borganizational\s+(capacity|effectiveness|development|readiness|stabilization)\b",
|
39 |
+
r"\borganization(?:al)?\s+infrastructure\b",
|
40 |
+
r"\bback[-\s]?office\b|\bbackbone\s+organization\b",
|
41 |
+
r"\bgovernance\b|\bboard\s+development\b|\bboard\s+training\b",
|
42 |
+
r"\bpre[-\s]?development\b|\bpredevelopment\b|\bplanning\s+grant\b",
|
43 |
+
r"\bdata\s+systems?\b|\bCRM\b|\bcase\s+management\b",
|
44 |
+
r"\b(staff|workforce)\s+capacity\b|\bhire\s+(?:staff|positions?)\b",
|
45 |
+
r"\bscal(?:e|ing)\s+capacity\b|\bexpand\s+capacity\b",
|
46 |
+
r"\bnonprofit\b|\bfaith[-\s]?based\b|\bcommunity[-\s]?based\b",
|
47 |
+
]]
|
48 |
+
|
49 |
+
_EXCLUDE_PATTERNS = [re.compile(p, re.I) for p in [
|
50 |
+
r"\bteaching\s+assistant\b|\bTAs\b",
|
51 |
+
r"\bbench\s+capacity\b|\bmanufacturing\s+capacity\b(?!.*organiz)",
|
52 |
+
r"\bclinical\s+trial\b|\blaboratory\s+capacity\b(?!.*community)",
|
53 |
+
r"\b(postsecondary|university|college)\b(?!.*community\s+partner)",
|
54 |
+
r"\bconstruction\b(?!.*(admin|organiz|back[-\s]?office|governance|systems))",
|
55 |
+
]]
|
56 |
+
|
57 |
+
_PA_MD_HINTS = re.compile(
|
58 |
+
r"\b("
|
59 |
+
r"Pennsylvania|PA\b|Harrisburg|Philadelphia|Allegheny|Montgomery County\b|Pittsburgh|Scranton|Erie|"
|
60 |
+
r"Maryland|MD\b|Annapolis|Baltimore|Prince\s+George'?s|Howard County\b"
|
61 |
+
r")\b",
|
62 |
+
re.I,
|
63 |
+
)
|
64 |
+
|
65 |
+
def _doc_text_from_row(rec: Dict[str, Any]) -> str:
|
66 |
+
title = rec.get("title") or ""
|
67 |
+
synopsis = rec.get("synopsis") or rec.get("summary") or ""
|
68 |
+
agency = rec.get("agency") or ""
|
69 |
+
eligibility = rec.get("eligibility") or ""
|
70 |
+
categories = " ".join(rec.get("categories") or []) if isinstance(rec.get("categories"), list) else (rec.get("categories") or "")
|
71 |
+
geo = rec.get("geo") or ""
|
72 |
+
return "\n".join([title, synopsis, agency, eligibility, categories, geo]).strip()
|
73 |
+
|
74 |
+
def _is_capacity_building_text(text: str) -> bool:
|
75 |
+
if not text:
|
76 |
+
return False
|
77 |
+
if any(p.search(text) for p in _EXCLUDE_PATTERNS):
|
78 |
+
return False
|
79 |
+
return any(p.search(text) for p in _INCLUDE_PATTERNS)
|
80 |
+
|
81 |
+
def _is_pa_md_text(text: str) -> bool:
|
82 |
+
if not text:
|
83 |
+
return False
|
84 |
+
return bool(_PA_MD_HINTS.search(text))
|
85 |
+
|
86 |
+
|
87 |
# -------------------- Grants.gov collector --------------------
|
88 |
|
89 |
def _collect_from_grantsgov_api(src: Dict) -> List[Dict[str, Any]]:
|
|
|
104 |
return [h for h in hits if isinstance(h, dict)]
|
105 |
|
106 |
|
107 |
+
# -------------------- NEW: Generic HTML / PDF collectors --------------------
|
108 |
+
|
109 |
+
_HTTP_HEADERS = {
|
110 |
+
"User-Agent": "grants-rag/1.0 (+https://example.local) requests",
|
111 |
+
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
112 |
+
}
|
113 |
+
|
114 |
+
def _http_get(url: str, timeout: int = 20) -> Optional[requests.Response]:
|
115 |
+
try:
|
116 |
+
r = requests.get(url, headers=_HTTP_HEADERS, timeout=timeout)
|
117 |
+
if r.status_code == 200 and r.content:
|
118 |
+
return r
|
119 |
+
except requests.RequestException:
|
120 |
+
return None
|
121 |
+
return None
|
122 |
+
|
123 |
+
def _soup(html: str) -> BeautifulSoup:
|
124 |
+
# use lxml or html5lib if available for robustness
|
125 |
+
return BeautifulSoup(html, "lxml")
|
126 |
+
|
127 |
+
def _text_from_soup(s: BeautifulSoup, selectors: Optional[List[str]] = None) -> Tuple[str, str]:
|
128 |
+
"""
|
129 |
+
Returns (title, text). Uses selectors if provided;
|
130 |
+
falls back to common content containers.
|
131 |
+
"""
|
132 |
+
title = s.title.string.strip() if s.title and s.title.string else ""
|
133 |
+
|
134 |
+
nodes = []
|
135 |
+
if selectors:
|
136 |
+
for css in selectors:
|
137 |
+
nodes.extend(s.select(css) or [])
|
138 |
+
if not nodes:
|
139 |
+
for css in ("main", "article", "#content", ".content", "[role='main']"):
|
140 |
+
nodes.extend(s.select(css) or [])
|
141 |
+
if not nodes:
|
142 |
+
nodes = [s.body] if s.body else []
|
143 |
+
|
144 |
+
parts: List[str] = []
|
145 |
+
for n in nodes:
|
146 |
+
if not n:
|
147 |
+
continue
|
148 |
+
txt = n.get_text(separator="\n", strip=True)
|
149 |
+
if txt:
|
150 |
+
parts.append(txt)
|
151 |
+
body = "\n\n".join(parts).strip()
|
152 |
+
return title, body
|
153 |
+
|
154 |
+
def _make_id(*fields: str) -> str:
|
155 |
+
h = hashlib.sha1()
|
156 |
+
for f in fields:
|
157 |
+
if f:
|
158 |
+
h.update(f.encode("utf-8", "ignore"))
|
159 |
+
h.update(b"|")
|
160 |
+
return h.hexdigest()
|
161 |
+
|
162 |
+
def _normalize_web_record(
|
163 |
+
source_name: str,
|
164 |
+
url: str,
|
165 |
+
title: str,
|
166 |
+
body: str,
|
167 |
+
static: Dict[str, Any],
|
168 |
+
extra: Optional[Dict[str, Any]] = None,
|
169 |
+
) -> Dict[str, Any]:
|
170 |
+
"""
|
171 |
+
Produce a record shaped like normalize() output so downstream stays unchanged.
|
172 |
+
"""
|
173 |
+
rec = {
|
174 |
+
"id": (extra or {}).get("id") or _make_id(url, title or body[:160]),
|
175 |
+
"title": title or (extra.get("title") if extra else "") or url,
|
176 |
+
"synopsis": body[:2000], # clip; embeddings use title+synopsis later
|
177 |
+
"summary": None,
|
178 |
+
"url": url,
|
179 |
+
"source": source_name,
|
180 |
+
"geo": static.get("geo"),
|
181 |
+
"categories": static.get("categories"),
|
182 |
+
"agency": (extra or {}).get("agency", ""),
|
183 |
+
"eligibility": (extra or {}).get("eligibility", ""),
|
184 |
+
"deadline": (extra or {}).get("deadline"),
|
185 |
+
"program_number": (extra or {}).get("program_number"),
|
186 |
+
"posted_date": (extra or {}).get("posted_date"),
|
187 |
+
}
|
188 |
+
return rec
|
189 |
+
|
190 |
+
def _collect_from_http_html(entry: Dict, source_name: str, static: Dict[str, Any]) -> List[Dict[str, Any]]:
|
191 |
+
"""
|
192 |
+
Supports types: 'web_page' and 'http_html'
|
193 |
+
Config keys supported:
|
194 |
+
- url (str)
|
195 |
+
- parse: { follow_links: bool, link_selectors: [..], content_selectors: [..] }
|
196 |
+
- crawl: { schedule: "...", max_depth: int } # max_depth 0/None = only landing
|
197 |
+
"""
|
198 |
+
url = entry.get("url")
|
199 |
+
if not url:
|
200 |
+
return []
|
201 |
+
r = _http_get(url)
|
202 |
+
if not r:
|
203 |
+
return []
|
204 |
+
|
205 |
+
s = _soup(r.text)
|
206 |
+
parse = entry.get("parse", {}) or entry.get("extract", {}) or {}
|
207 |
+
content_selectors = parse.get("content_selectors") or []
|
208 |
+
title, body = _text_from_soup(s, content_selectors)
|
209 |
+
|
210 |
+
rows = []
|
211 |
+
rows.append(_normalize_web_record(source_name, url, title, body, static, extra={"posted_date": None}))
|
212 |
+
|
213 |
+
# follow links?
|
214 |
+
follow = bool(parse.get("follow_links"))
|
215 |
+
link_selectors = parse.get("link_selectors") or []
|
216 |
+
crawl = entry.get("crawl", {}) or {}
|
217 |
+
max_depth = int(crawl.get("max_depth", 0) or 0)
|
218 |
+
visited = set([url])
|
219 |
+
|
220 |
+
def _enq_links(soup: BeautifulSoup) -> List[str]:
|
221 |
+
if link_selectors:
|
222 |
+
links = []
|
223 |
+
for sel in link_selectors:
|
224 |
+
for a in soup.select(sel) or []:
|
225 |
+
href = a.get("href")
|
226 |
+
if href and href.startswith("http"):
|
227 |
+
links.append(href)
|
228 |
+
out, seen = [], set()
|
229 |
+
for h in links:
|
230 |
+
if h not in seen:
|
231 |
+
out.append(h)
|
232 |
+
seen.add(h)
|
233 |
+
return out[:40] # polite cap
|
234 |
+
return []
|
235 |
+
|
236 |
+
if follow and max_depth > 0:
|
237 |
+
frontier = _enq_links(s)
|
238 |
+
depth = 1
|
239 |
+
while frontier and depth <= max_depth and len(rows) < 200:
|
240 |
+
next_frontier = []
|
241 |
+
for link in frontier:
|
242 |
+
if link in visited:
|
243 |
+
continue
|
244 |
+
visited.add(link)
|
245 |
+
rr = _http_get(link)
|
246 |
+
if not rr:
|
247 |
+
continue
|
248 |
+
ss = _soup(rr.text)
|
249 |
+
t2, b2 = _text_from_soup(ss, content_selectors)
|
250 |
+
if b2:
|
251 |
+
rows.append(_normalize_web_record(source_name, link, t2, b2, static, extra={"posted_date": None}))
|
252 |
+
if depth < max_depth:
|
253 |
+
next_frontier.extend(_enq_links(ss))
|
254 |
+
time.sleep(0.1) # gentle
|
255 |
+
frontier = next_frontier
|
256 |
+
depth += 1
|
257 |
+
|
258 |
+
return rows
|
259 |
+
|
260 |
+
def _collect_from_http_pdf(entry: Dict, source_name: str, static: Dict[str, Any]) -> List[Dict[str, Any]]:
|
261 |
+
"""
|
262 |
+
type: 'http_pdf'
|
263 |
+
keys:
|
264 |
+
- url (single PDF fetch)
|
265 |
+
"""
|
266 |
+
url = entry.get("url")
|
267 |
+
if not url:
|
268 |
+
return []
|
269 |
+
|
270 |
+
try:
|
271 |
+
from pdfminer.high_level import extract_text # lazy import
|
272 |
+
except Exception:
|
273 |
+
return []
|
274 |
+
|
275 |
+
rows = []
|
276 |
+
r = _http_get(url, timeout=40)
|
277 |
+
if not r:
|
278 |
+
return rows
|
279 |
+
tmp = DOCSTORE_DIR / (hashlib.sha1(url.encode("utf-8")).hexdigest() + ".pdf")
|
280 |
+
try:
|
281 |
+
DOCSTORE_DIR.mkdir(parents=True, exist_ok=True)
|
282 |
+
tmp.write_bytes(r.content)
|
283 |
+
body = extract_text(str(tmp)) or ""
|
284 |
+
finally:
|
285 |
+
try:
|
286 |
+
tmp.unlink(missing_ok=True)
|
287 |
+
except Exception:
|
288 |
+
pass
|
289 |
+
title = entry.get("name") or "PDF Document"
|
290 |
+
if body.strip():
|
291 |
+
rows.append(_normalize_web_record(source_name, url, title, body, static, extra={"posted_date": None}))
|
292 |
+
return rows
|
293 |
+
|
294 |
+
|
295 |
# -------------------- Write docstore & build index --------------------
|
296 |
|
297 |
def _save_docstore(recs: List[Dict[str, Any]]) -> str:
|
|
|
308 |
if not ds_path.exists():
|
309 |
raise RuntimeError("Docstore not found. Run ingest first.")
|
310 |
|
|
|
311 |
texts: List[str] = []
|
312 |
metas: List[Dict[str, Any]] = []
|
313 |
with ds_path.open("r", encoding="utf-8") as f:
|
|
|
337 |
print(f"[index] Rows loaded from docstore: {len(texts)}")
|
338 |
|
339 |
if not texts:
|
340 |
+
INDEX_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
341 |
(INDEX_DIR / "meta.json").write_text(json.dumps([], ensure_ascii=False))
|
342 |
print("[index] No texts to embed. Wrote empty meta.json.")
|
343 |
return 0
|
344 |
|
345 |
+
# Embed (CPU default; portable)
|
346 |
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
|
347 |
model.max_seq_length = 256
|
348 |
+
batch = max(8, min(32, len(texts)))
|
349 |
emb = model.encode(
|
350 |
texts,
|
351 |
convert_to_numpy=True,
|
|
|
368 |
return len(texts)
|
369 |
|
370 |
|
371 |
+
# -------------------- Public API: ingest --------------------
|
372 |
+
|
373 |
+
__all__ = ["ingest"]
|
374 |
|
375 |
def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
|
376 |
"""
|
377 |
+
Reads config, fetches from enabled sources via adapters, normalizes to a single schema,
|
378 |
+
applies filters (capacity / PA-MD), dedupes, writes docstore, and builds the FAISS index.
|
379 |
+
Returns (docstore_path, n_indexed).
|
380 |
"""
|
381 |
cfg = load_config(cfg_path)
|
382 |
|
383 |
+
# ---- Filters from config ----
|
384 |
+
f_cfg = (cfg or {}).get("filters", {}) or {}
|
385 |
+
capacity_only = bool(f_cfg.get("capacity_only", True))
|
386 |
+
pa_md_only = bool(f_cfg.get("pa_md_only", False))
|
387 |
+
print(f"[filters] capacity_only = {'TRUE' if capacity_only else 'FALSE'}")
|
388 |
+
print(f"[filters] pa_md_only = {'TRUE' if pa_md_only else 'FALSE'}")
|
389 |
+
|
390 |
all_rows: List[Dict[str, Any]] = []
|
391 |
for entry in cfg.get("sources", []):
|
392 |
if not entry.get("enabled"):
|
393 |
continue
|
394 |
|
395 |
name = entry.get("name", "<source>")
|
396 |
+
geo = entry.get("geo") or "US"
|
397 |
cats = entry.get("categories") or []
|
398 |
static = {"geo": geo, "categories": cats}
|
399 |
|
|
|
404 |
raw_hits = _collect_from_grantsgov_api(entry)
|
405 |
rows = [normalize("grants_gov", h, static) for h in raw_hits]
|
406 |
|
407 |
+
elif typ in ("web_page", "http_html"):
|
408 |
+
rows = _collect_from_http_html(entry, name, static)
|
409 |
+
|
410 |
+
elif typ == "http_pdf":
|
411 |
+
rows = _collect_from_http_pdf(entry, name, static)
|
412 |
+
|
413 |
elif typ == "local_sample":
|
414 |
p = Path(entry["path"]).expanduser()
|
415 |
blob = json.loads(p.read_text(encoding="utf-8"))
|
416 |
items = blob.get("opportunities") or []
|
417 |
rows = [normalize("local_sample", op, static) for op in items]
|
418 |
|
419 |
+
# Unknown types => skip silently
|
420 |
+
|
421 |
+
# ---- Apply capacity / geo filters BEFORE collecting ----
|
422 |
+
if rows and (capacity_only or pa_md_only):
|
423 |
+
filtered = []
|
424 |
+
for r in rows:
|
425 |
+
t = _doc_text_from_row(r)
|
426 |
+
if capacity_only and not _is_capacity_building_text(t):
|
427 |
+
continue
|
428 |
+
if pa_md_only and not _is_pa_md_text(t):
|
429 |
+
continue
|
430 |
+
filtered.append(r)
|
431 |
+
print(f"[filter] {name}: kept {len(filtered)}/{len(rows)} after filters")
|
432 |
+
rows = filtered
|
433 |
|
434 |
print(f"[collect] {name} → {len(rows)} rows")
|
435 |
all_rows.extend(rows)
|
436 |
|
437 |
+
# ---- DEDUPE (by id → url → title) ----
|
438 |
seen, unique = set(), []
|
439 |
for r in all_rows:
|
440 |
key = r.get("id") or r.get("url") or r.get("title")
|
|
|
450 |
return path, n
|
451 |
|
452 |
|
453 |
+
# -------------------- CLI --------------------
|
454 |
+
|
455 |
if __name__ == "__main__":
|
456 |
import argparse
|
457 |
ap = argparse.ArgumentParser()
|
config/sources.yaml
CHANGED
@@ -1,4 +1,8 @@
|
|
1 |
# Minimal, valid config — v6.3
|
|
|
|
|
|
|
|
|
2 |
sources:
|
3 |
# ---------- FEDERAL: Grants.gov (focused for buses/van/mobility & reentry) ----------
|
4 |
|
@@ -83,6 +87,8 @@ sources:
|
|
83 |
sortBy: "openDate|desc"
|
84 |
|
85 |
# ---------- STATE & METRO PASS-THROUGHS (FTA 5310 etc.) ----------
|
|
|
|
|
86 |
|
87 |
- name: "Maryland MTA — Grants (incl. 5310)"
|
88 |
type: web_page
|
@@ -156,8 +162,149 @@ sources:
|
|
156 |
mode: "article"
|
157 |
keep_links: true
|
158 |
|
159 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
- name: "State 5310 Listings (curated JSON)"
|
162 |
type: json_static
|
163 |
enabled: false # set to true once you generate the file below
|
|
|
1 |
# Minimal, valid config — v6.3
|
2 |
+
filters:
|
3 |
+
capacity_only: true # keep only capacity-building items
|
4 |
+
pa_md_only: false # set to true to restrict index to PA/MD
|
5 |
+
|
6 |
sources:
|
7 |
# ---------- FEDERAL: Grants.gov (focused for buses/van/mobility & reentry) ----------
|
8 |
|
|
|
87 |
sortBy: "openDate|desc"
|
88 |
|
89 |
# ---------- STATE & METRO PASS-THROUGHS (FTA 5310 etc.) ----------
|
90 |
+
# NOTE: These require adapters (http_html/web_page/http_pdf) you haven't implemented yet.
|
91 |
+
# They are kept here (enabled) in case your runtime supports them; otherwise set enabled: false.
|
92 |
|
93 |
- name: "Maryland MTA — Grants (incl. 5310)"
|
94 |
type: web_page
|
|
|
162 |
mode: "article"
|
163 |
keep_links: true
|
164 |
|
165 |
+
# --- Pennsylvania: PCA (state arts) ---
|
166 |
+
- name: "PA Creative Industries – Capacity Building (landing)"
|
167 |
+
type: http_html
|
168 |
+
enabled: true
|
169 |
+
url: "https://www.pa.gov/agencies/coa/grants-and-loans/capacity-building-programs.html"
|
170 |
+
geo: "PA"
|
171 |
+
categories: ["capacity_building"]
|
172 |
+
parse:
|
173 |
+
follow_links: true
|
174 |
+
link_selectors:
|
175 |
+
- "a[href*='capacity']"
|
176 |
+
- "a[href*='strategies-for-success']"
|
177 |
+
- "a[href$='.pdf']"
|
178 |
+
content_selectors:
|
179 |
+
- "main"
|
180 |
+
- "article"
|
181 |
+
- ".content"
|
182 |
+
|
183 |
+
- name: "PA Creative Industries – Creative Sector Flex Fund"
|
184 |
+
type: http_html
|
185 |
+
enabled: true
|
186 |
+
url: "https://www.pa.gov/agencies/coa/grants-and-loans/creative-sector-flex-fund.html"
|
187 |
+
geo: "PA"
|
188 |
+
categories: ["capacity_building"]
|
189 |
+
parse:
|
190 |
+
follow_links: true
|
191 |
+
link_selectors:
|
192 |
+
- "a[href$='.pdf']"
|
193 |
+
- "a[href*='guidelines']"
|
194 |
+
- "a[href*='apply']"
|
195 |
+
content_selectors:
|
196 |
+
- "main"
|
197 |
+
- "article"
|
198 |
+
- ".content"
|
199 |
|
200 |
+
# --- Pennsylvania: PCCD (eGrants announcements & PDFs) ---
|
201 |
+
- name: "PCCD – Funding Announcements (eGrants)"
|
202 |
+
type: http_html
|
203 |
+
enabled: true
|
204 |
+
url: "https://www.pccd.pa.gov/Funding/Pages/default.aspx"
|
205 |
+
geo: "PA"
|
206 |
+
categories: ["capacity_building", "public_safety", "youth"]
|
207 |
+
parse:
|
208 |
+
follow_links: true
|
209 |
+
link_selectors:
|
210 |
+
- "a[href*='Funding-Announcement']"
|
211 |
+
- "a[href$='.pdf']"
|
212 |
+
- "a[href*='CJAB']"
|
213 |
+
- "a[href*='VIP']"
|
214 |
+
- "a[href*='CCVI']"
|
215 |
+
- "a[href*='BOOST']"
|
216 |
+
content_selectors:
|
217 |
+
- "main"
|
218 |
+
- "article"
|
219 |
+
- ".ms-rtestate-field"
|
220 |
+
|
221 |
+
- name: "PCCD – PDFs (deep fetch)"
|
222 |
+
type: http_pdf
|
223 |
+
enabled: true
|
224 |
+
url_patterns:
|
225 |
+
- "https://www.pccd.pa.gov/*/*.pdf"
|
226 |
+
geo: "PA"
|
227 |
+
categories: ["capacity_building"]
|
228 |
+
|
229 |
+
# --- Maryland: OneStop (statewide grant listings with 'capacity' search) ---
|
230 |
+
- name: "Maryland OneStop – Capacity search"
|
231 |
+
type: http_html
|
232 |
+
enabled: true
|
233 |
+
url: "https://onestop.md.gov/search?query=capacity"
|
234 |
+
geo: "MD"
|
235 |
+
categories: ["capacity_building"]
|
236 |
+
parse:
|
237 |
+
follow_links: true
|
238 |
+
link_selectors:
|
239 |
+
- "a[href*='/forms/']"
|
240 |
+
- "a[href*='/search/']"
|
241 |
+
content_selectors:
|
242 |
+
- "main"
|
243 |
+
- "article"
|
244 |
+
- "[role='main']"
|
245 |
+
|
246 |
+
# --- Maryland: DHCD (housing/community programs & press) ---
|
247 |
+
- name: "MD DHCD – Programs (grants & loans index)"
|
248 |
+
type: http_html
|
249 |
+
enabled: true
|
250 |
+
url: "https://dhcd.maryland.gov/Pages/Programs.aspx"
|
251 |
+
geo: "MD"
|
252 |
+
categories: ["capacity_building", "housing", "community_development"]
|
253 |
+
parse:
|
254 |
+
follow_links: true
|
255 |
+
link_selectors:
|
256 |
+
- "a[href*='Programs']"
|
257 |
+
- "a[href$='.pdf']"
|
258 |
+
- "a[href*='Trust']"
|
259 |
+
content_selectors:
|
260 |
+
- "#content"
|
261 |
+
- "main"
|
262 |
+
- "article"
|
263 |
+
|
264 |
+
- name: "MD DHCD – Press/Notices (watch for NOFOs)"
|
265 |
+
type: http_html
|
266 |
+
enabled: true
|
267 |
+
url: "https://dhcd.maryland.gov/Pages/PressReleases.aspx"
|
268 |
+
geo: "MD"
|
269 |
+
categories: ["capacity_building"]
|
270 |
+
parse:
|
271 |
+
follow_links: true
|
272 |
+
link_selectors:
|
273 |
+
- "a[href$='.pdf']"
|
274 |
+
- "a[href*='Notice']"
|
275 |
+
- "a[href*='Funding']"
|
276 |
+
content_selectors:
|
277 |
+
- "#content"
|
278 |
+
- "main"
|
279 |
+
- "article"
|
280 |
+
|
281 |
+
# --- Maryland: Chesapeake Bay Trust (recurring capacity-building RFPs) ---
|
282 |
+
- name: "Chesapeake Bay Trust – Capacity Building Initiative (CBI)"
|
283 |
+
type: http_html
|
284 |
+
enabled: true
|
285 |
+
url: "https://cbtrust.org/grants/capacity-building/"
|
286 |
+
geo: "MD"
|
287 |
+
categories: ["capacity_building", "environment", "community_health"]
|
288 |
+
parse:
|
289 |
+
follow_links: true
|
290 |
+
link_selectors:
|
291 |
+
- "a[href$='.pdf']"
|
292 |
+
- "a[href*='Request-for-Proposals']"
|
293 |
+
- "a[href*='RFP']"
|
294 |
+
content_selectors:
|
295 |
+
- "main"
|
296 |
+
- "article"
|
297 |
+
- ".entry-content"
|
298 |
+
|
299 |
+
- name: "CB Trust – PDFs (deep fetch)"
|
300 |
+
type: http_pdf
|
301 |
+
enabled: true
|
302 |
+
url_patterns:
|
303 |
+
- "https://cbtrust.org/*/*.pdf"
|
304 |
+
geo: "MD"
|
305 |
+
categories: ["capacity_building"]
|
306 |
+
|
307 |
+
# ---------- OPTIONAL: Curated JSON (enable after you generate it) ----------
|
308 |
- name: "State 5310 Listings (curated JSON)"
|
309 |
type: json_static
|
310 |
enabled: false # set to true once you generate the file below
|
project-plan-rag.rtf
ADDED
@@ -0,0 +1,569 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{\rtf1\ansi\ansicpg1252\cocoartf2822
|
2 |
+
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\froman\fcharset0 Times-Bold;\f1\froman\fcharset0 Times-Roman;\f2\fmodern\fcharset0 Courier;
|
3 |
+
\f3\froman\fcharset0 TimesNewRomanPSMT;\f4\fnil\fcharset0 AppleColorEmoji;\f5\froman\fcharset0 Times-Italic;
|
4 |
+
\f6\fnil\fcharset0 Menlo-Regular;\f7\fnil\fcharset0 HelveticaNeue;}
|
5 |
+
{\colortbl;\red255\green255\blue255;\red0\green0\blue0;\red0\green0\blue233;\red109\green109\blue109;
|
6 |
+
\red109\green109\blue109;\red0\green0\blue0;}
|
7 |
+
{\*\expandedcolortbl;;\cssrgb\c0\c0\c0;\cssrgb\c0\c0\c93333;\cssrgb\c50196\c50196\c50196;
|
8 |
+
\cssrgb\c50196\c50196\c50196;\cssrgb\c0\c0\c0\c84706;}
|
9 |
+
{\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid1\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid1}
|
10 |
+
{\list\listtemplateid2\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}}{\leveltext\leveltemplateid101\'01\'00;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{circle\}}{\leveltext\leveltemplateid102\'01\uc0\u9702 ;}{\levelnumbers;}\fi-360\li1440\lin1440 }{\listname ;}\listid2}
|
11 |
+
{\list\listtemplateid3\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid201\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid3}
|
12 |
+
{\list\listtemplateid4\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}}{\leveltext\leveltemplateid301\'01\'00;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{circle\}}{\leveltext\leveltemplateid302\'01\uc0\u9702 ;}{\levelnumbers;}\fi-360\li1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{square\}}{\leveltext\leveltemplateid303\'01\uc0\u9642 ;}{\levelnumbers;}\fi-360\li2160\lin2160 }{\listname ;}\listid4}
|
13 |
+
{\list\listtemplateid5\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid401\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid5}
|
14 |
+
{\list\listtemplateid6\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}}{\leveltext\leveltemplateid501\'01\'00;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listname ;}\listid6}}
|
15 |
+
{\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}{\listoverride\listid2\listoverridecount0\ls2}{\listoverride\listid3\listoverridecount0\ls3}{\listoverride\listid4\listoverridecount0\ls4}{\listoverride\listid5\listoverridecount0\ls5}{\listoverride\listid6\listoverridecount0\ls6}}
|
16 |
+
\margl1440\margr1440\vieww31340\viewh19300\viewkind0
|
17 |
+
\deftab720
|
18 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
19 |
+
|
20 |
+
\f0\b\fs36 \cf0 \expnd0\expndtw0\kerning0
|
21 |
+
Best practices & features to include\
|
22 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
23 |
+
|
24 |
+
\f1\b0\fs24 \cf0 From the literature + what you\'92ve built already, here are features that improve quality & usability. {\field{\*\fldinst{HYPERLINK "https://www.funraise.org/blog/grant-management-software-for-nonprofits?utm_source=chatgpt.com"}}{\fldrslt \cf3 \ul \ulc3 NetSuite+3Funraise+3Fluxx+3}}\
|
25 |
+
|
26 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrt\brdrnil \trbrdrl\brdrnil \trbrdrr\brdrnil
|
27 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
28 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
29 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
30 |
+
\pard\intbl\itap1\pardeftab720\qc\partightenfactor0
|
31 |
+
|
32 |
+
\f0\b \cf0 Feature\cell
|
33 |
+
\pard\intbl\itap1\pardeftab720\qc\partightenfactor0
|
34 |
+
\cf0 Why it matters\cell
|
35 |
+
\pard\intbl\itap1\pardeftab720\qc\partightenfactor0
|
36 |
+
\cf0 How to implement / what to watch out for\cell \row
|
37 |
+
|
38 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
39 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
40 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
41 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
42 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
43 |
+
\cf0 Opportunity filters
|
44 |
+
\f1\b0 (keywords, geography, type, capacity-building etc.)\cell
|
45 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
46 |
+
\cf0 Helps users narrow to what matters, reduces overload.\cell
|
47 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
48 |
+
\cf0 You have keyword filters already. Also include date, state, amount range, \'93open vs closed\'94 status.\cell \row
|
49 |
+
|
50 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
51 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
52 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
53 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
54 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
55 |
+
|
56 |
+
\f0\b \cf0 Deadline alerts / reminders
|
57 |
+
\f1\b0 \cell
|
58 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
59 |
+
\cf0 Prevents missing grants because deadlines slip.\cell
|
60 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
61 |
+
\cf0 Track
|
62 |
+
\f2\fs26 deadline
|
63 |
+
\f1\fs24 (if parsed), then show upcoming ones. Allow export to calendar or reminders.\cell \row
|
64 |
+
|
65 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
66 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
67 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
68 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
69 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
70 |
+
|
71 |
+
\f0\b \cf0 Document / PDF fetching + parsing
|
72 |
+
\f1\b0 \cell
|
73 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
74 |
+
\cf0 Many state sources have PDFs, RFPs etc. Users want details, not just summary.\cell
|
75 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
76 |
+
\cf0 Use PDF adapter + follow-links. Flag PDFs clearly in UI.\cell \row
|
77 |
+
|
78 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
79 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
80 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
81 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
82 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
83 |
+
|
84 |
+
\f0\b \cf0 Staleness / expiration detection
|
85 |
+
\f1\b0 \cell
|
86 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
87 |
+
\cf0 Projects with expired deadlines clutter feeds.\cell
|
88 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
89 |
+
\cf0 Tag items with \'93deadline passed\'94 or \'93pending\'94 etc. Maybe auto-hide old ones after some time.\cell \row
|
90 |
+
|
91 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
92 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
93 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
94 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
95 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
96 |
+
|
97 |
+
\f0\b \cf0 User feedback / manual review / save items
|
98 |
+
\f1\b0 \cell
|
99 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
100 |
+
\cf0 Users can flag false positives, save promising ones.\cell
|
101 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
102 |
+
\cf0 Add \'93bookmark / save\'94 or \'93dismiss\'94 features. Could feed into machine learning or heuristics over time.\cell \row
|
103 |
+
|
104 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
105 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
106 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
107 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
108 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
109 |
+
|
110 |
+
\f0\b \cf0 Dashboard / Analytics
|
111 |
+
\f1\b0 \cell
|
112 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
113 |
+
\cf0 Helps see grant volume by type, deadlines, states, etc.\cell
|
114 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
115 |
+
\cf0 Simple charts like # grants by month, # capacity-building grants vs total, etc.\cell \row
|
116 |
+
|
117 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
118 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
119 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
120 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
121 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
122 |
+
|
123 |
+
\f0\b \cf0 Good defaults / simple UI
|
124 |
+
\f1\b0 \cell
|
125 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
126 |
+
\cf0 Avoid cognitive overload \'97 show only essentials, allow advanced filtering if needed.\cell
|
127 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
128 |
+
\cf0 E.g. show title, deadline, source, eligibility; hide long descriptions by default.\cell \row
|
129 |
+
|
130 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
|
131 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
132 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
133 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
134 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
135 |
+
|
136 |
+
\f0\b \cf0 Mobile-friendly / responsive
|
137 |
+
\f1\b0 \cell
|
138 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
139 |
+
\cf0 Many will check on phones or tablets.\cell
|
140 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
141 |
+
\cf0 If web UI, ensure collapsible fields, simple menus.\cell \row
|
142 |
+
|
143 |
+
\itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrt\brdrnil \trbrdrr\brdrnil
|
144 |
+
\clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
|
145 |
+
\clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
|
146 |
+
\clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
|
147 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
148 |
+
|
149 |
+
\f0\b \cf0 Integrations
|
150 |
+
\f1\b0 \cell
|
151 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
152 |
+
\cf0 E.g. calendar, Slack/email alerts.\cell
|
153 |
+
\pard\intbl\itap1\pardeftab720\partightenfactor0
|
154 |
+
\cf0 Helps push notifications rather than only manual checking.\cell \lastrow\row
|
155 |
+
\pard\pardeftab720\partightenfactor0
|
156 |
+
\cf4 \
|
157 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
158 |
+
|
159 |
+
\f0\b\fs36 \cf0 What to avoid / limit to keep usability\
|
160 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
161 |
+
|
162 |
+
\f1\b0\fs24 \cf0 These things often add data/noise or complexity, so either avoid or gate them behind \'93advanced\'94 toggles.\
|
163 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
164 |
+
\ls1\ilvl0
|
165 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
166 |
+
Over-broad scraping
|
167 |
+
\f1\b0 : pulling every \'93policy\'94, \'93program info\'94, \'93newsletter\'94, etc. just because capacity keywords appear once.\
|
168 |
+
\ls1\ilvl0
|
169 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
170 |
+
Too much automation without oversight
|
171 |
+
\f1\b0 : e.g. hidden deadlines if PDF text parsing fails.\
|
172 |
+
\ls1\ilvl0
|
173 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
174 |
+
Overwhelming metadata fields
|
175 |
+
\f1\b0 : stuff like \'93agency budget history\'94, \'93application score weights\'94 etc unless users request them.\
|
176 |
+
\ls1\ilvl0
|
177 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
178 |
+
Frequent UI changes or too many fields
|
179 |
+
\f1\b0 \'97 keep interface consistent.\
|
180 |
+
\ls1\ilvl0
|
181 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
182 |
+
Large batch of false positives
|
183 |
+
\f1\b0 \'97 this ruins trust quickly.\
|
184 |
+
\pard\pardeftab720\partightenfactor0
|
185 |
+
\cf4 \
|
186 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
187 |
+
|
188 |
+
\f0\b\fs36 \cf0 What to build next in your tool\
|
189 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
190 |
+
|
191 |
+
\f1\b0\fs24 \cf0 Here are prioritized improvements/next features for your tool to make it more powerful while preserving clarity:\
|
192 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
193 |
+
\ls2\ilvl0
|
194 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 1 }\expnd0\expndtw0\kerning0
|
195 |
+
Deadline extraction / detection
|
196 |
+
\f1\b0 \
|
197 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
198 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
199 |
+
\f3 \uc0\u9702
|
200 |
+
\f1 }\expnd0\expndtw0\kerning0
|
201 |
+
From Grants.gov API: likely available.\
|
202 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
203 |
+
\f3 \uc0\u9702
|
204 |
+
\f1 }\expnd0\expndtw0\kerning0
|
205 |
+
From scraped state pages / PDFs: attempt to parse \'93deadline\'94, \'93closing date\'94. If missing, mark as \'93TBD\'94.\
|
206 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
207 |
+
\f3 \uc0\u9702
|
208 |
+
\f1 }\expnd0\expndtw0\kerning0
|
209 |
+
UI: highlight upcoming deadlines (\'93Due in next 30 days\'94).\
|
210 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
211 |
+
\ls2\ilvl0
|
212 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 2 }\expnd0\expndtw0\kerning0
|
213 |
+
\'93Open / Active\'94 flag
|
214 |
+
\f1\b0 \
|
215 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
216 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
217 |
+
\f3 \uc0\u9702
|
218 |
+
\f1 }\expnd0\expndtw0\kerning0
|
219 |
+
If RFPs are active or you can detect \'93application now open\'94 in text.\
|
220 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
221 |
+
\f3 \uc0\u9702
|
222 |
+
\f1 }\expnd0\expndtw0\kerning0
|
223 |
+
If not, mark as \'93reference / program\'94 so users know it\'92s structural info, not a live call.\
|
224 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
225 |
+
\ls2\ilvl0
|
226 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 3 }\expnd0\expndtw0\kerning0
|
227 |
+
Bookmark / dismiss / feedback
|
228 |
+
\f1\b0 \
|
229 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
230 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
231 |
+
\f3 \uc0\u9702
|
232 |
+
\f1 }\expnd0\expndtw0\kerning0
|
233 |
+
Users should be able to mark \'93this is useful\'94 or \'93not relevant\'94 to train future filtering.\
|
234 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
235 |
+
\f3 \uc0\u9702
|
236 |
+
\f1 }\expnd0\expndtw0\kerning0
|
237 |
+
Possibly store local tags (e.g. \'93my state\'94, \'93my priority\'94).\
|
238 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
239 |
+
\ls2\ilvl0
|
240 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 4 }\expnd0\expndtw0\kerning0
|
241 |
+
Improved link / PDF following
|
242 |
+
\f1\b0 \
|
243 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
244 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
245 |
+
\f3 \uc0\u9702
|
246 |
+
\f1 }\expnd0\expndtw0\kerning0
|
247 |
+
As suggested earlier.\
|
248 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
249 |
+
\f3 \uc0\u9702
|
250 |
+
\f1 }\expnd0\expndtw0\kerning0
|
251 |
+
Make sure link selectors are fine-tuned for each state source.\
|
252 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
253 |
+
\ls2\ilvl0
|
254 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 5 }\expnd0\expndtw0\kerning0
|
255 |
+
Expired / historical items archive
|
256 |
+
\f1\b0 \
|
257 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
258 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
259 |
+
\f3 \uc0\u9702
|
260 |
+
\f1 }\expnd0\expndtw0\kerning0
|
261 |
+
Hide automatically unless explicitly requested. Keeps main view clean.\
|
262 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
263 |
+
\ls2\ilvl0
|
264 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 6 }\expnd0\expndtw0\kerning0
|
265 |
+
Search + sort + filter UI in front end
|
266 |
+
\f1\b0 \
|
267 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
268 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
269 |
+
\f3 \uc0\u9702
|
270 |
+
\f1 }\expnd0\expndtw0\kerning0
|
271 |
+
Filters: State, source type (federal/state), capacity vs other, keyword.\
|
272 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
273 |
+
\f3 \uc0\u9702
|
274 |
+
\f1 }\expnd0\expndtw0\kerning0
|
275 |
+
Sort by deadline, date posted, amount (if available).\
|
276 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
277 |
+
\ls2\ilvl0
|
278 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 7 }\expnd0\expndtw0\kerning0
|
279 |
+
Notifications / reminders
|
280 |
+
\f1\b0 \
|
281 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
282 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
283 |
+
\f3 \uc0\u9702
|
284 |
+
\f1 }\expnd0\expndtw0\kerning0
|
285 |
+
Let users get alerts (email / Slack / calendar) for items that match their saved filters and have upcoming deadlines.\
|
286 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
287 |
+
\ls2\ilvl0
|
288 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 8 }\expnd0\expndtw0\kerning0
|
289 |
+
Confidence score or matching hint
|
290 |
+
\f1\b0 \
|
291 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
292 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
293 |
+
\f3 \uc0\u9702
|
294 |
+
\f1 }\expnd0\expndtw0\kerning0
|
295 |
+
For scraped items, display \'93match strength\'94 (how many capacity keywords matched, whether in title vs body). Helps users see which items are likely relevant.\
|
296 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
297 |
+
\ls2\ilvl0
|
298 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 9 }\expnd0\expndtw0\kerning0
|
299 |
+
Performance / scheduling
|
300 |
+
\f1\b0 \
|
301 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
302 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
303 |
+
\f3 \uc0\u9702
|
304 |
+
\f1 }\expnd0\expndtw0\kerning0
|
305 |
+
Check feeds regularly, avoid stale caches.\
|
306 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
307 |
+
\f3 \uc0\u9702
|
308 |
+
\f1 }\expnd0\expndtw0\kerning0
|
309 |
+
Maybe incremental ingest (only new items) rather than full crawl all the time.\
|
310 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
311 |
+
\ls2\ilvl0
|
312 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 10 }\expnd0\expndtw0\kerning0
|
313 |
+
Testing & feedback
|
314 |
+
\f1\b0 \
|
315 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
316 |
+
\ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
|
317 |
+
\f3 \uc0\u9702
|
318 |
+
\f1 }\expnd0\expndtw0\kerning0
|
319 |
+
Ask actual users (your team) to test beta versions and tell you what\'92s too much / too little.\
|
320 |
+
\ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
|
321 |
+
\f3 \uc0\u9702
|
322 |
+
\f1 }\expnd0\expndtw0\kerning0
|
323 |
+
Adjust based on real use.\
|
324 |
+
\pard\pardeftab720\partightenfactor0
|
325 |
+
\cf4 \
|
326 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
327 |
+
|
328 |
+
\f0\b\fs36 \cf0 Your roadmap can look like this\
|
329 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
330 |
+
|
331 |
+
\f1\b0\fs24 \cf0 Here\'92s a potential plan for the next few sprints:\
|
332 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
333 |
+
\ls3\ilvl0
|
334 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
335 |
+
Sprint 1
|
336 |
+
\f1\b0 (now): Add deadline extraction + \'93active\'94 flag + improved link following for state sources.\
|
337 |
+
\ls3\ilvl0
|
338 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
339 |
+
Sprint 2
|
340 |
+
\f1\b0 : Bookmark/dismiss features + advanced filtering UI + sort by deadline.\
|
341 |
+
\ls3\ilvl0
|
342 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
343 |
+
Sprint 3
|
344 |
+
\f1\b0 : Notifications/reminders + mobile UI polish + confidence scoring.\
|
345 |
+
\ls3\ilvl0
|
346 |
+
\f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
347 |
+
Sprint 4
|
348 |
+
\f1\b0 : Incorporate JS-rendered sources (Playwright) + archive/historical mode.\
|
349 |
+
\pard\tx720\pardeftab720\sa240\partightenfactor0
|
350 |
+
\cf0 \
|
351 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
352 |
+
\cf0 \outl0\strokewidth0 \strokec2 Yes\'97exactly. We can treat the roadmap I outlined earlier as
|
353 |
+
\f0\b four sprints
|
354 |
+
\f1\b0 .\uc0\u8232 Here\'92s a clear
|
355 |
+
\f0\b Sprint 1 plan
|
356 |
+
\f1\b0 (2\'963 weeks is typical) so we can start delivering value quickly while keeping the other sprints in view.\
|
357 |
+
\pard\pardeftab720\partightenfactor0
|
358 |
+
\cf5 \strokec5 \
|
359 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
360 |
+
|
361 |
+
\f4\fs36 \cf0 \strokec2 \uc0\u55356 \u57281
|
362 |
+
\f0\b Sprint 1 \'96 \'93Actionable Deadlines & Better State Data\'94\
|
363 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
364 |
+
|
365 |
+
\fs24 \cf0 Goal:
|
366 |
+
\f1\b0 Surface
|
367 |
+
\f5\i current
|
368 |
+
\f1\i0 and
|
369 |
+
\f5\i timely
|
370 |
+
\f1\i0 grant opportunities\'97especially from PA & MD\'97without flooding users.\
|
371 |
+
\pard\pardeftab720\sa280\partightenfactor0
|
372 |
+
|
373 |
+
\f0\b\fs28 \cf0 Deliverables\
|
374 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
375 |
+
\ls4\ilvl0
|
376 |
+
\fs24 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 1 }\expnd0\expndtw0\kerning0
|
377 |
+
\outl0\strokewidth0 \strokec2 Deadline Extraction & \'93Active\'94 Flag
|
378 |
+
\f1\b0 \
|
379 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
380 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
381 |
+
\f3 \uc0\u9702
|
382 |
+
\f1 }\expnd0\expndtw0\kerning0
|
383 |
+
\outl0\strokewidth0 \strokec2 Parse
|
384 |
+
\f2\fs26 deadline
|
385 |
+
\f1\fs24 /
|
386 |
+
\f2\fs26 closing date
|
387 |
+
\f1\fs24 text from Grants.gov (API gives it directly).\
|
388 |
+
\ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
389 |
+
\f3 \uc0\u9702
|
390 |
+
\f1 }\expnd0\expndtw0\kerning0
|
391 |
+
\outl0\strokewidth0 \strokec2 Add a lightweight NLP/regex extractor for state HTML/PDFs (\'93Deadline\'94, \'93Applications due\'94, etc.).\
|
392 |
+
\ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
393 |
+
\f3 \uc0\u9702
|
394 |
+
\f1 }\expnd0\expndtw0\kerning0
|
395 |
+
\outl0\strokewidth0 \strokec2 In the index, add fields:
|
396 |
+
\f2\fs26 deadline_date
|
397 |
+
\f1\fs24 ,
|
398 |
+
\f2\fs26 is_active
|
399 |
+
\f1\fs24 (true if deadline >= today or marked open/TBD).\
|
400 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
401 |
+
\ls4\ilvl0
|
402 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 2 }\expnd0\expndtw0\kerning0
|
403 |
+
\outl0\strokewidth0 \strokec2 UI Updates in the Dashboard
|
404 |
+
\f1\b0 \
|
405 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
406 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
407 |
+
\f3 \uc0\u9702
|
408 |
+
\f1 }\expnd0\expndtw0\kerning0
|
409 |
+
\outl0\strokewidth0 \strokec2 Show deadlines and days-to-close (already in the mock).\
|
410 |
+
\ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
411 |
+
\f3 \uc0\u9702
|
412 |
+
\f1 }\expnd0\expndtw0\kerning0
|
413 |
+
\outl0\strokewidth0 \strokec2 Add a small
|
414 |
+
\f0\b \'93Active Only\'94
|
415 |
+
\f1\b0 toggle to hide expired or guideline-only records.\
|
416 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
417 |
+
\ls4\ilvl0
|
418 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 3 }\expnd0\expndtw0\kerning0
|
419 |
+
\outl0\strokewidth0 \strokec2 Link-Following Enhancements
|
420 |
+
\f1\b0 \
|
421 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
422 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
423 |
+
\f3 \uc0\u9702
|
424 |
+
\f1 }\expnd0\expndtw0\kerning0
|
425 |
+
\outl0\strokewidth0 \strokec2 Update YAML for
|
426 |
+
\f0\b MD DHCD Press
|
427 |
+
\f1\b0 ,
|
428 |
+
\f0\b MTA
|
429 |
+
\f1\b0 ,
|
430 |
+
\f0\b PennDOT
|
431 |
+
\f1\b0 , etc. to:\
|
432 |
+
\pard\tx1660\tx2160\pardeftab720\li2160\fi-2160\sa240\partightenfactor0
|
433 |
+
\ls4\ilvl2
|
434 |
+
\f2\fs26 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
435 |
+
\f6 \uc0\u9642
|
436 |
+
\f2 }\expnd0\expndtw0\kerning0
|
437 |
+
\outl0\strokewidth0 \strokec2 parse.follow_links: true
|
438 |
+
\f1\fs24 \
|
439 |
+
\ls4\ilvl2
|
440 |
+
\f2\fs26 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
441 |
+
\f6 \uc0\u9642
|
442 |
+
\f2 }\expnd0\expndtw0\kerning0
|
443 |
+
\outl0\strokewidth0 \strokec2 crawl.max_depth: 1
|
444 |
+
\f1\fs24 \
|
445 |
+
\ls4\ilvl2
|
446 |
+
\f2\fs26 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
447 |
+
\f6 \uc0\u9642
|
448 |
+
\f2 }\expnd0\expndtw0\kerning0
|
449 |
+
\outl0\strokewidth0 \strokec2 link_selectors
|
450 |
+
\f1\fs24 for PDFs / NOFO pages\
|
451 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
452 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
453 |
+
\f3 \uc0\u9702
|
454 |
+
\f1 }\expnd0\expndtw0\kerning0
|
455 |
+
\outl0\strokewidth0 \strokec2 Ensure adapter fetches and normalizes these linked pages.\
|
456 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
457 |
+
\ls4\ilvl0
|
458 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 4 }\expnd0\expndtw0\kerning0
|
459 |
+
\outl0\strokewidth0 \strokec2 Improved Capacity Filter
|
460 |
+
\f1\b0 \
|
461 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
462 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
463 |
+
\f3 \uc0\u9702
|
464 |
+
\f1 }\expnd0\expndtw0\kerning0
|
465 |
+
\outl0\strokewidth0 \strokec2 Add a few transportation-related capacity phrases (\'93service capacity\'94, \'93provider capacity\'94) to capture more relevant 5310-type opportunities.\
|
466 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
467 |
+
\ls4\ilvl0
|
468 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 5 }\expnd0\expndtw0\kerning0
|
469 |
+
\outl0\strokewidth0 \strokec2 Basic Quality Checks
|
470 |
+
\f1\b0 \
|
471 |
+
\pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
|
472 |
+
\ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
473 |
+
\f3 \uc0\u9702
|
474 |
+
\f1 }\expnd0\expndtw0\kerning0
|
475 |
+
\outl0\strokewidth0 \strokec2 Logging: show counts of items skipped due to expired deadlines or missing text.\
|
476 |
+
\ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
|
477 |
+
\f3 \uc0\u9702
|
478 |
+
\f1 }\expnd0\expndtw0\kerning0
|
479 |
+
\outl0\strokewidth0 \strokec2 Quick manual review with your team to confirm that the new records are genuinely grant opportunities.\
|
480 |
+
\pard\pardeftab720\partightenfactor0
|
481 |
+
\cf5 \strokec5 \
|
482 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
483 |
+
|
484 |
+
\f4\fs36 \cf0 \strokec2 \uc0\u55357 \u56621
|
485 |
+
\f0\b Sprint 2\'964 Preview\
|
486 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
487 |
+
\ls5\ilvl0
|
488 |
+
\fs24 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
489 |
+
\outl0\strokewidth0 \strokec2 Sprint 2:
|
490 |
+
\f1\b0 Bookmarks & Dismiss, Advanced Filtering UI, Sorting by deadline/confidence.\
|
491 |
+
\ls5\ilvl0
|
492 |
+
\f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
493 |
+
\outl0\strokewidth0 \strokec2 Sprint 3:
|
494 |
+
\f1\b0 Notifications (email/Slack/ICS calendar) + mobile UI polish + confidence scoring display.\
|
495 |
+
\ls5\ilvl0
|
496 |
+
\f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
|
497 |
+
\outl0\strokewidth0 \strokec2 Sprint 4:
|
498 |
+
\f1\b0 Full
|
499 |
+
\f0\b JS-rendered
|
500 |
+
\f1\b0 source support (Playwright for MD OneStop etc.) and historical archive mode.\
|
501 |
+
\pard\pardeftab720\partightenfactor0
|
502 |
+
\cf5 \strokec5 \
|
503 |
+
\pard\pardeftab720\sa298\partightenfactor0
|
504 |
+
|
505 |
+
\f0\b\fs36 \cf0 \strokec2 Next Steps to Kick Off Sprint 1\
|
506 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
507 |
+
\ls6\ilvl0
|
508 |
+
\fs24 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 1 }\expnd0\expndtw0\kerning0
|
509 |
+
\outl0\strokewidth0 \strokec2 Confirm dev environment
|
510 |
+
\f1\b0 \'96 you already have
|
511 |
+
\f2\fs26 grants-rag
|
512 |
+
\f1\fs24 running locally.\
|
513 |
+
\ls6\ilvl0
|
514 |
+
\f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 2 }\expnd0\expndtw0\kerning0
|
515 |
+
\outl0\strokewidth0 \strokec2 Create a new branch
|
516 |
+
\f1\b0 :\uc0\u8232
|
517 |
+
\f2\fs26 \uc0\u8232 \u8232 \u8232 git checkout -b sprint1-deadline-active-flag\
|
518 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0
|
519 |
+
\ls6\ilvl0\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 3 }\expnd0\expndtw0\kerning0
|
520 |
+
\outl0\strokewidth0 \strokec2 \uc0\u8232 \u8232
|
521 |
+
\f1\fs24 \
|
522 |
+
\pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
|
523 |
+
\ls6\ilvl0
|
524 |
+
\f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 4 }\expnd0\expndtw0\kerning0
|
525 |
+
\outl0\strokewidth0 \strokec2 Add extraction utilities
|
526 |
+
\f1\b0 \uc0\u8232
|
527 |
+
\f5\i Regex for state HTML/PDFs
|
528 |
+
\f1\i0 and update
|
529 |
+
\f2\fs26 _normalize_web_record
|
530 |
+
\f1\fs24 to accept
|
531 |
+
\f2\fs26 deadline_date
|
532 |
+
\f1\fs24 and
|
533 |
+
\f2\fs26 is_active
|
534 |
+
\f1\fs24 .\
|
535 |
+
\ls6\ilvl0
|
536 |
+
\f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 5 }\expnd0\expndtw0\kerning0
|
537 |
+
\outl0\strokewidth0 \strokec2 UI
|
538 |
+
\f1\b0 : enable the
|
539 |
+
\f0\b Active Only
|
540 |
+
\f1\b0 toggle (mock is already designed for it).\
|
541 |
+
\pard\pardeftab720\partightenfactor0
|
542 |
+
\cf5 \strokec5 \
|
543 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
544 |
+
|
545 |
+
\f0\b \cf0 \strokec2 Timeline suggestion:
|
546 |
+
\f1\b0 2 weeks dev + 1 week QA / user feedback.\uc0\u8232 Once Sprint 1 is merged, we\'92ll have a tool that surfaces only
|
547 |
+
\f5\i active
|
548 |
+
\f1\i0 capacity-building grants with clear deadlines\'97exactly the balance of \'93rich data without overload\'94 we\'92re after.\
|
549 |
+
\pard\pardeftab720\partightenfactor0
|
550 |
+
\cf0 \
|
551 |
+
\
|
552 |
+
\
|
553 |
+
\
|
554 |
+
\
|
555 |
+
\
|
556 |
+
\
|
557 |
+
\pard\pardeftab720\sa240\partightenfactor0
|
558 |
+
\cf0 \
|
559 |
+
\pard\pardeftab720\partightenfactor0
|
560 |
+
\cf0 \
|
561 |
+
\pard\pardeftab720\qc\partightenfactor0
|
562 |
+
|
563 |
+
\f7\fs22 \cf6 \strokec6 \
|
564 |
+
\pard\pardeftab720\partightenfactor0
|
565 |
+
|
566 |
+
\f1\fs24 \cf0 \strokec2 ChatGPT can make mistakes. Check important info.\
|
567 |
+
\pard\tx720\pardeftab720\sa240\partightenfactor0
|
568 |
+
\cf0 \outl0\strokewidth0 \
|
569 |
+
}
|