michaellupo74 commited on
Commit
796e66c
·
1 Parent(s): b363844

feat: add state-level HTML/PDF adapters and updated capacity filters

Browse files
Files changed (4) hide show
  1. .gitignore +7 -0
  2. app/ingest.py +294 -14
  3. config/sources.yaml +148 -1
  4. project-plan-rag.rtf +569 -0
.gitignore CHANGED
@@ -1,3 +1,10 @@
 
 
 
 
 
 
 
1
  .venv/
2
  __pycache__/
3
  *.pyc
 
1
+
2
+ venv/
3
+ *.pyc
4
+ __pycache__/
5
+ .DS_Store
6
+ Makefile.old
7
+ start-up-project.txt
8
  .venv/
9
  __pycache__/
10
  *.pyc
app/ingest.py CHANGED
@@ -2,7 +2,7 @@
2
  from __future__ import annotations
3
  import json
4
  from pathlib import Path
5
- from typing import Dict, List, Any
6
 
7
  import yaml
8
  import numpy as np
@@ -11,6 +11,12 @@ from sentence_transformers import SentenceTransformer
11
  from app.paths import DOCSTORE_DIR, INDEX_DIR
12
  from .normalize import normalize # ← central normalizer
13
 
 
 
 
 
 
 
14
 
15
  # -------------------- Config --------------------
16
 
@@ -19,6 +25,65 @@ def load_config(cfg_path: str) -> Dict:
19
  return yaml.safe_load(f)
20
 
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # -------------------- Grants.gov collector --------------------
23
 
24
  def _collect_from_grantsgov_api(src: Dict) -> List[Dict[str, Any]]:
@@ -39,6 +104,194 @@ def _collect_from_grantsgov_api(src: Dict) -> List[Dict[str, Any]]:
39
  return [h for h in hits if isinstance(h, dict)]
40
 
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  # -------------------- Write docstore & build index --------------------
43
 
44
  def _save_docstore(recs: List[Dict[str, Any]]) -> str:
@@ -55,7 +308,6 @@ def _build_index_from_docstore() -> int:
55
  if not ds_path.exists():
56
  raise RuntimeError("Docstore not found. Run ingest first.")
57
 
58
- # Load records → texts + metas
59
  texts: List[str] = []
60
  metas: List[Dict[str, Any]] = []
61
  with ds_path.open("r", encoding="utf-8") as f:
@@ -85,16 +337,15 @@ def _build_index_from_docstore() -> int:
85
  print(f"[index] Rows loaded from docstore: {len(texts)}")
86
 
87
  if not texts:
88
- # Write an empty index file so downstream UI can still boot gracefully
89
- (INDEX_DIR).mkdir(parents=True, exist_ok=True)
90
  (INDEX_DIR / "meta.json").write_text(json.dumps([], ensure_ascii=False))
91
  print("[index] No texts to embed. Wrote empty meta.json.")
92
  return 0
93
 
94
- # Embed (CPU default; keeps it portable)
95
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
96
  model.max_seq_length = 256
97
- batch = max(8, min(32, len(texts))) # sensible batch size for small corpora
98
  emb = model.encode(
99
  texts,
100
  convert_to_numpy=True,
@@ -117,22 +368,32 @@ def _build_index_from_docstore() -> int:
117
  return len(texts)
118
 
119
 
120
- # -------------------- Ingest main --------------------
 
 
121
 
122
  def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
123
  """
124
- Reads config, fetches from enabled sources, normalizes with a single map,
125
- attaches categories/geo consistently, DEDUPEs, and builds the index.
 
126
  """
127
  cfg = load_config(cfg_path)
128
 
 
 
 
 
 
 
 
129
  all_rows: List[Dict[str, Any]] = []
130
  for entry in cfg.get("sources", []):
131
  if not entry.get("enabled"):
132
  continue
133
 
134
  name = entry.get("name", "<source>")
135
- geo = entry.get("geo") or "US"
136
  cats = entry.get("categories") or []
137
  static = {"geo": geo, "categories": cats}
138
 
@@ -143,20 +404,37 @@ def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
143
  raw_hits = _collect_from_grantsgov_api(entry)
144
  rows = [normalize("grants_gov", h, static) for h in raw_hits]
145
 
 
 
 
 
 
 
146
  elif typ == "local_sample":
147
  p = Path(entry["path"]).expanduser()
148
  blob = json.loads(p.read_text(encoding="utf-8"))
149
  items = blob.get("opportunities") or []
150
  rows = [normalize("local_sample", op, static) for op in items]
151
 
152
- else:
153
- # Future adapters (doj_ojp, state_md, web_page, json_static, …)
154
- rows = []
 
 
 
 
 
 
 
 
 
 
 
155
 
156
  print(f"[collect] {name} → {len(rows)} rows")
157
  all_rows.extend(rows)
158
 
159
- # ---- DEDUPE (id → url → title) ----
160
  seen, unique = set(), []
161
  for r in all_rows:
162
  key = r.get("id") or r.get("url") or r.get("title")
@@ -172,6 +450,8 @@ def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
172
  return path, n
173
 
174
 
 
 
175
  if __name__ == "__main__":
176
  import argparse
177
  ap = argparse.ArgumentParser()
 
2
  from __future__ import annotations
3
  import json
4
  from pathlib import Path
5
+ from typing import Dict, List, Any, Tuple, Optional
6
 
7
  import yaml
8
  import numpy as np
 
11
  from app.paths import DOCSTORE_DIR, INDEX_DIR
12
  from .normalize import normalize # ← central normalizer
13
 
14
+ import re
15
+ import time
16
+ import hashlib
17
+ import requests
18
+ from bs4 import BeautifulSoup
19
+
20
 
21
  # -------------------- Config --------------------
22
 
 
25
  return yaml.safe_load(f)
26
 
27
 
28
+ # -------------------- Capacity / Geo Filters (config-driven) --------------------
29
+ # controls live in config/sources.yaml:
30
+ # filters:
31
+ # capacity_only: true
32
+ # pa_md_only: false
33
+
34
+ _INCLUDE_PATTERNS = [re.compile(p, re.I) for p in [
35
+ r"\bcapacity(?:[-\s]?building)?\b",
36
+ r"\btechnical\s+assistance\b",
37
+ r"\bTA\b",
38
+ r"\borganizational\s+(capacity|effectiveness|development|readiness|stabilization)\b",
39
+ r"\borganization(?:al)?\s+infrastructure\b",
40
+ r"\bback[-\s]?office\b|\bbackbone\s+organization\b",
41
+ r"\bgovernance\b|\bboard\s+development\b|\bboard\s+training\b",
42
+ r"\bpre[-\s]?development\b|\bpredevelopment\b|\bplanning\s+grant\b",
43
+ r"\bdata\s+systems?\b|\bCRM\b|\bcase\s+management\b",
44
+ r"\b(staff|workforce)\s+capacity\b|\bhire\s+(?:staff|positions?)\b",
45
+ r"\bscal(?:e|ing)\s+capacity\b|\bexpand\s+capacity\b",
46
+ r"\bnonprofit\b|\bfaith[-\s]?based\b|\bcommunity[-\s]?based\b",
47
+ ]]
48
+
49
+ _EXCLUDE_PATTERNS = [re.compile(p, re.I) for p in [
50
+ r"\bteaching\s+assistant\b|\bTAs\b",
51
+ r"\bbench\s+capacity\b|\bmanufacturing\s+capacity\b(?!.*organiz)",
52
+ r"\bclinical\s+trial\b|\blaboratory\s+capacity\b(?!.*community)",
53
+ r"\b(postsecondary|university|college)\b(?!.*community\s+partner)",
54
+ r"\bconstruction\b(?!.*(admin|organiz|back[-\s]?office|governance|systems))",
55
+ ]]
56
+
57
+ _PA_MD_HINTS = re.compile(
58
+ r"\b("
59
+ r"Pennsylvania|PA\b|Harrisburg|Philadelphia|Allegheny|Montgomery County\b|Pittsburgh|Scranton|Erie|"
60
+ r"Maryland|MD\b|Annapolis|Baltimore|Prince\s+George'?s|Howard County\b"
61
+ r")\b",
62
+ re.I,
63
+ )
64
+
65
+ def _doc_text_from_row(rec: Dict[str, Any]) -> str:
66
+ title = rec.get("title") or ""
67
+ synopsis = rec.get("synopsis") or rec.get("summary") or ""
68
+ agency = rec.get("agency") or ""
69
+ eligibility = rec.get("eligibility") or ""
70
+ categories = " ".join(rec.get("categories") or []) if isinstance(rec.get("categories"), list) else (rec.get("categories") or "")
71
+ geo = rec.get("geo") or ""
72
+ return "\n".join([title, synopsis, agency, eligibility, categories, geo]).strip()
73
+
74
+ def _is_capacity_building_text(text: str) -> bool:
75
+ if not text:
76
+ return False
77
+ if any(p.search(text) for p in _EXCLUDE_PATTERNS):
78
+ return False
79
+ return any(p.search(text) for p in _INCLUDE_PATTERNS)
80
+
81
+ def _is_pa_md_text(text: str) -> bool:
82
+ if not text:
83
+ return False
84
+ return bool(_PA_MD_HINTS.search(text))
85
+
86
+
87
  # -------------------- Grants.gov collector --------------------
88
 
89
  def _collect_from_grantsgov_api(src: Dict) -> List[Dict[str, Any]]:
 
104
  return [h for h in hits if isinstance(h, dict)]
105
 
106
 
107
+ # -------------------- NEW: Generic HTML / PDF collectors --------------------
108
+
109
+ _HTTP_HEADERS = {
110
+ "User-Agent": "grants-rag/1.0 (+https://example.local) requests",
111
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
112
+ }
113
+
114
+ def _http_get(url: str, timeout: int = 20) -> Optional[requests.Response]:
115
+ try:
116
+ r = requests.get(url, headers=_HTTP_HEADERS, timeout=timeout)
117
+ if r.status_code == 200 and r.content:
118
+ return r
119
+ except requests.RequestException:
120
+ return None
121
+ return None
122
+
123
+ def _soup(html: str) -> BeautifulSoup:
124
+ # use lxml or html5lib if available for robustness
125
+ return BeautifulSoup(html, "lxml")
126
+
127
+ def _text_from_soup(s: BeautifulSoup, selectors: Optional[List[str]] = None) -> Tuple[str, str]:
128
+ """
129
+ Returns (title, text). Uses selectors if provided;
130
+ falls back to common content containers.
131
+ """
132
+ title = s.title.string.strip() if s.title and s.title.string else ""
133
+
134
+ nodes = []
135
+ if selectors:
136
+ for css in selectors:
137
+ nodes.extend(s.select(css) or [])
138
+ if not nodes:
139
+ for css in ("main", "article", "#content", ".content", "[role='main']"):
140
+ nodes.extend(s.select(css) or [])
141
+ if not nodes:
142
+ nodes = [s.body] if s.body else []
143
+
144
+ parts: List[str] = []
145
+ for n in nodes:
146
+ if not n:
147
+ continue
148
+ txt = n.get_text(separator="\n", strip=True)
149
+ if txt:
150
+ parts.append(txt)
151
+ body = "\n\n".join(parts).strip()
152
+ return title, body
153
+
154
+ def _make_id(*fields: str) -> str:
155
+ h = hashlib.sha1()
156
+ for f in fields:
157
+ if f:
158
+ h.update(f.encode("utf-8", "ignore"))
159
+ h.update(b"|")
160
+ return h.hexdigest()
161
+
162
+ def _normalize_web_record(
163
+ source_name: str,
164
+ url: str,
165
+ title: str,
166
+ body: str,
167
+ static: Dict[str, Any],
168
+ extra: Optional[Dict[str, Any]] = None,
169
+ ) -> Dict[str, Any]:
170
+ """
171
+ Produce a record shaped like normalize() output so downstream stays unchanged.
172
+ """
173
+ rec = {
174
+ "id": (extra or {}).get("id") or _make_id(url, title or body[:160]),
175
+ "title": title or (extra.get("title") if extra else "") or url,
176
+ "synopsis": body[:2000], # clip; embeddings use title+synopsis later
177
+ "summary": None,
178
+ "url": url,
179
+ "source": source_name,
180
+ "geo": static.get("geo"),
181
+ "categories": static.get("categories"),
182
+ "agency": (extra or {}).get("agency", ""),
183
+ "eligibility": (extra or {}).get("eligibility", ""),
184
+ "deadline": (extra or {}).get("deadline"),
185
+ "program_number": (extra or {}).get("program_number"),
186
+ "posted_date": (extra or {}).get("posted_date"),
187
+ }
188
+ return rec
189
+
190
+ def _collect_from_http_html(entry: Dict, source_name: str, static: Dict[str, Any]) -> List[Dict[str, Any]]:
191
+ """
192
+ Supports types: 'web_page' and 'http_html'
193
+ Config keys supported:
194
+ - url (str)
195
+ - parse: { follow_links: bool, link_selectors: [..], content_selectors: [..] }
196
+ - crawl: { schedule: "...", max_depth: int } # max_depth 0/None = only landing
197
+ """
198
+ url = entry.get("url")
199
+ if not url:
200
+ return []
201
+ r = _http_get(url)
202
+ if not r:
203
+ return []
204
+
205
+ s = _soup(r.text)
206
+ parse = entry.get("parse", {}) or entry.get("extract", {}) or {}
207
+ content_selectors = parse.get("content_selectors") or []
208
+ title, body = _text_from_soup(s, content_selectors)
209
+
210
+ rows = []
211
+ rows.append(_normalize_web_record(source_name, url, title, body, static, extra={"posted_date": None}))
212
+
213
+ # follow links?
214
+ follow = bool(parse.get("follow_links"))
215
+ link_selectors = parse.get("link_selectors") or []
216
+ crawl = entry.get("crawl", {}) or {}
217
+ max_depth = int(crawl.get("max_depth", 0) or 0)
218
+ visited = set([url])
219
+
220
+ def _enq_links(soup: BeautifulSoup) -> List[str]:
221
+ if link_selectors:
222
+ links = []
223
+ for sel in link_selectors:
224
+ for a in soup.select(sel) or []:
225
+ href = a.get("href")
226
+ if href and href.startswith("http"):
227
+ links.append(href)
228
+ out, seen = [], set()
229
+ for h in links:
230
+ if h not in seen:
231
+ out.append(h)
232
+ seen.add(h)
233
+ return out[:40] # polite cap
234
+ return []
235
+
236
+ if follow and max_depth > 0:
237
+ frontier = _enq_links(s)
238
+ depth = 1
239
+ while frontier and depth <= max_depth and len(rows) < 200:
240
+ next_frontier = []
241
+ for link in frontier:
242
+ if link in visited:
243
+ continue
244
+ visited.add(link)
245
+ rr = _http_get(link)
246
+ if not rr:
247
+ continue
248
+ ss = _soup(rr.text)
249
+ t2, b2 = _text_from_soup(ss, content_selectors)
250
+ if b2:
251
+ rows.append(_normalize_web_record(source_name, link, t2, b2, static, extra={"posted_date": None}))
252
+ if depth < max_depth:
253
+ next_frontier.extend(_enq_links(ss))
254
+ time.sleep(0.1) # gentle
255
+ frontier = next_frontier
256
+ depth += 1
257
+
258
+ return rows
259
+
260
+ def _collect_from_http_pdf(entry: Dict, source_name: str, static: Dict[str, Any]) -> List[Dict[str, Any]]:
261
+ """
262
+ type: 'http_pdf'
263
+ keys:
264
+ - url (single PDF fetch)
265
+ """
266
+ url = entry.get("url")
267
+ if not url:
268
+ return []
269
+
270
+ try:
271
+ from pdfminer.high_level import extract_text # lazy import
272
+ except Exception:
273
+ return []
274
+
275
+ rows = []
276
+ r = _http_get(url, timeout=40)
277
+ if not r:
278
+ return rows
279
+ tmp = DOCSTORE_DIR / (hashlib.sha1(url.encode("utf-8")).hexdigest() + ".pdf")
280
+ try:
281
+ DOCSTORE_DIR.mkdir(parents=True, exist_ok=True)
282
+ tmp.write_bytes(r.content)
283
+ body = extract_text(str(tmp)) or ""
284
+ finally:
285
+ try:
286
+ tmp.unlink(missing_ok=True)
287
+ except Exception:
288
+ pass
289
+ title = entry.get("name") or "PDF Document"
290
+ if body.strip():
291
+ rows.append(_normalize_web_record(source_name, url, title, body, static, extra={"posted_date": None}))
292
+ return rows
293
+
294
+
295
  # -------------------- Write docstore & build index --------------------
296
 
297
  def _save_docstore(recs: List[Dict[str, Any]]) -> str:
 
308
  if not ds_path.exists():
309
  raise RuntimeError("Docstore not found. Run ingest first.")
310
 
 
311
  texts: List[str] = []
312
  metas: List[Dict[str, Any]] = []
313
  with ds_path.open("r", encoding="utf-8") as f:
 
337
  print(f"[index] Rows loaded from docstore: {len(texts)}")
338
 
339
  if not texts:
340
+ INDEX_DIR.mkdir(parents=True, exist_ok=True)
 
341
  (INDEX_DIR / "meta.json").write_text(json.dumps([], ensure_ascii=False))
342
  print("[index] No texts to embed. Wrote empty meta.json.")
343
  return 0
344
 
345
+ # Embed (CPU default; portable)
346
  model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
347
  model.max_seq_length = 256
348
+ batch = max(8, min(32, len(texts)))
349
  emb = model.encode(
350
  texts,
351
  convert_to_numpy=True,
 
368
  return len(texts)
369
 
370
 
371
+ # -------------------- Public API: ingest --------------------
372
+
373
+ __all__ = ["ingest"]
374
 
375
  def ingest(cfg_path: str = "config/sources.yaml", env: Dict | None = None):
376
  """
377
+ Reads config, fetches from enabled sources via adapters, normalizes to a single schema,
378
+ applies filters (capacity / PA-MD), dedupes, writes docstore, and builds the FAISS index.
379
+ Returns (docstore_path, n_indexed).
380
  """
381
  cfg = load_config(cfg_path)
382
 
383
+ # ---- Filters from config ----
384
+ f_cfg = (cfg or {}).get("filters", {}) or {}
385
+ capacity_only = bool(f_cfg.get("capacity_only", True))
386
+ pa_md_only = bool(f_cfg.get("pa_md_only", False))
387
+ print(f"[filters] capacity_only = {'TRUE' if capacity_only else 'FALSE'}")
388
+ print(f"[filters] pa_md_only = {'TRUE' if pa_md_only else 'FALSE'}")
389
+
390
  all_rows: List[Dict[str, Any]] = []
391
  for entry in cfg.get("sources", []):
392
  if not entry.get("enabled"):
393
  continue
394
 
395
  name = entry.get("name", "<source>")
396
+ geo = entry.get("geo") or "US"
397
  cats = entry.get("categories") or []
398
  static = {"geo": geo, "categories": cats}
399
 
 
404
  raw_hits = _collect_from_grantsgov_api(entry)
405
  rows = [normalize("grants_gov", h, static) for h in raw_hits]
406
 
407
+ elif typ in ("web_page", "http_html"):
408
+ rows = _collect_from_http_html(entry, name, static)
409
+
410
+ elif typ == "http_pdf":
411
+ rows = _collect_from_http_pdf(entry, name, static)
412
+
413
  elif typ == "local_sample":
414
  p = Path(entry["path"]).expanduser()
415
  blob = json.loads(p.read_text(encoding="utf-8"))
416
  items = blob.get("opportunities") or []
417
  rows = [normalize("local_sample", op, static) for op in items]
418
 
419
+ # Unknown types => skip silently
420
+
421
+ # ---- Apply capacity / geo filters BEFORE collecting ----
422
+ if rows and (capacity_only or pa_md_only):
423
+ filtered = []
424
+ for r in rows:
425
+ t = _doc_text_from_row(r)
426
+ if capacity_only and not _is_capacity_building_text(t):
427
+ continue
428
+ if pa_md_only and not _is_pa_md_text(t):
429
+ continue
430
+ filtered.append(r)
431
+ print(f"[filter] {name}: kept {len(filtered)}/{len(rows)} after filters")
432
+ rows = filtered
433
 
434
  print(f"[collect] {name} → {len(rows)} rows")
435
  all_rows.extend(rows)
436
 
437
+ # ---- DEDUPE (by id → url → title) ----
438
  seen, unique = set(), []
439
  for r in all_rows:
440
  key = r.get("id") or r.get("url") or r.get("title")
 
450
  return path, n
451
 
452
 
453
+ # -------------------- CLI --------------------
454
+
455
  if __name__ == "__main__":
456
  import argparse
457
  ap = argparse.ArgumentParser()
config/sources.yaml CHANGED
@@ -1,4 +1,8 @@
1
  # Minimal, valid config — v6.3
 
 
 
 
2
  sources:
3
  # ---------- FEDERAL: Grants.gov (focused for buses/van/mobility & reentry) ----------
4
 
@@ -83,6 +87,8 @@ sources:
83
  sortBy: "openDate|desc"
84
 
85
  # ---------- STATE & METRO PASS-THROUGHS (FTA 5310 etc.) ----------
 
 
86
 
87
  - name: "Maryland MTA — Grants (incl. 5310)"
88
  type: web_page
@@ -156,8 +162,149 @@ sources:
156
  mode: "article"
157
  keep_links: true
158
 
159
- # ---------- OPTIONAL: Curated JSON (enable after you generate it) ----------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  - name: "State 5310 Listings (curated JSON)"
162
  type: json_static
163
  enabled: false # set to true once you generate the file below
 
1
  # Minimal, valid config — v6.3
2
+ filters:
3
+ capacity_only: true # keep only capacity-building items
4
+ pa_md_only: false # set to true to restrict index to PA/MD
5
+
6
  sources:
7
  # ---------- FEDERAL: Grants.gov (focused for buses/van/mobility & reentry) ----------
8
 
 
87
  sortBy: "openDate|desc"
88
 
89
  # ---------- STATE & METRO PASS-THROUGHS (FTA 5310 etc.) ----------
90
+ # NOTE: These require adapters (http_html/web_page/http_pdf) you haven't implemented yet.
91
+ # They are kept here (enabled) in case your runtime supports them; otherwise set enabled: false.
92
 
93
  - name: "Maryland MTA — Grants (incl. 5310)"
94
  type: web_page
 
162
  mode: "article"
163
  keep_links: true
164
 
165
+ # --- Pennsylvania: PCA (state arts) ---
166
+ - name: "PA Creative Industries – Capacity Building (landing)"
167
+ type: http_html
168
+ enabled: true
169
+ url: "https://www.pa.gov/agencies/coa/grants-and-loans/capacity-building-programs.html"
170
+ geo: "PA"
171
+ categories: ["capacity_building"]
172
+ parse:
173
+ follow_links: true
174
+ link_selectors:
175
+ - "a[href*='capacity']"
176
+ - "a[href*='strategies-for-success']"
177
+ - "a[href$='.pdf']"
178
+ content_selectors:
179
+ - "main"
180
+ - "article"
181
+ - ".content"
182
+
183
+ - name: "PA Creative Industries – Creative Sector Flex Fund"
184
+ type: http_html
185
+ enabled: true
186
+ url: "https://www.pa.gov/agencies/coa/grants-and-loans/creative-sector-flex-fund.html"
187
+ geo: "PA"
188
+ categories: ["capacity_building"]
189
+ parse:
190
+ follow_links: true
191
+ link_selectors:
192
+ - "a[href$='.pdf']"
193
+ - "a[href*='guidelines']"
194
+ - "a[href*='apply']"
195
+ content_selectors:
196
+ - "main"
197
+ - "article"
198
+ - ".content"
199
 
200
+ # --- Pennsylvania: PCCD (eGrants announcements & PDFs) ---
201
+ - name: "PCCD – Funding Announcements (eGrants)"
202
+ type: http_html
203
+ enabled: true
204
+ url: "https://www.pccd.pa.gov/Funding/Pages/default.aspx"
205
+ geo: "PA"
206
+ categories: ["capacity_building", "public_safety", "youth"]
207
+ parse:
208
+ follow_links: true
209
+ link_selectors:
210
+ - "a[href*='Funding-Announcement']"
211
+ - "a[href$='.pdf']"
212
+ - "a[href*='CJAB']"
213
+ - "a[href*='VIP']"
214
+ - "a[href*='CCVI']"
215
+ - "a[href*='BOOST']"
216
+ content_selectors:
217
+ - "main"
218
+ - "article"
219
+ - ".ms-rtestate-field"
220
+
221
+ - name: "PCCD – PDFs (deep fetch)"
222
+ type: http_pdf
223
+ enabled: true
224
+ url_patterns:
225
+ - "https://www.pccd.pa.gov/*/*.pdf"
226
+ geo: "PA"
227
+ categories: ["capacity_building"]
228
+
229
+ # --- Maryland: OneStop (statewide grant listings with 'capacity' search) ---
230
+ - name: "Maryland OneStop – Capacity search"
231
+ type: http_html
232
+ enabled: true
233
+ url: "https://onestop.md.gov/search?query=capacity"
234
+ geo: "MD"
235
+ categories: ["capacity_building"]
236
+ parse:
237
+ follow_links: true
238
+ link_selectors:
239
+ - "a[href*='/forms/']"
240
+ - "a[href*='/search/']"
241
+ content_selectors:
242
+ - "main"
243
+ - "article"
244
+ - "[role='main']"
245
+
246
+ # --- Maryland: DHCD (housing/community programs & press) ---
247
+ - name: "MD DHCD – Programs (grants & loans index)"
248
+ type: http_html
249
+ enabled: true
250
+ url: "https://dhcd.maryland.gov/Pages/Programs.aspx"
251
+ geo: "MD"
252
+ categories: ["capacity_building", "housing", "community_development"]
253
+ parse:
254
+ follow_links: true
255
+ link_selectors:
256
+ - "a[href*='Programs']"
257
+ - "a[href$='.pdf']"
258
+ - "a[href*='Trust']"
259
+ content_selectors:
260
+ - "#content"
261
+ - "main"
262
+ - "article"
263
+
264
+ - name: "MD DHCD – Press/Notices (watch for NOFOs)"
265
+ type: http_html
266
+ enabled: true
267
+ url: "https://dhcd.maryland.gov/Pages/PressReleases.aspx"
268
+ geo: "MD"
269
+ categories: ["capacity_building"]
270
+ parse:
271
+ follow_links: true
272
+ link_selectors:
273
+ - "a[href$='.pdf']"
274
+ - "a[href*='Notice']"
275
+ - "a[href*='Funding']"
276
+ content_selectors:
277
+ - "#content"
278
+ - "main"
279
+ - "article"
280
+
281
+ # --- Maryland: Chesapeake Bay Trust (recurring capacity-building RFPs) ---
282
+ - name: "Chesapeake Bay Trust – Capacity Building Initiative (CBI)"
283
+ type: http_html
284
+ enabled: true
285
+ url: "https://cbtrust.org/grants/capacity-building/"
286
+ geo: "MD"
287
+ categories: ["capacity_building", "environment", "community_health"]
288
+ parse:
289
+ follow_links: true
290
+ link_selectors:
291
+ - "a[href$='.pdf']"
292
+ - "a[href*='Request-for-Proposals']"
293
+ - "a[href*='RFP']"
294
+ content_selectors:
295
+ - "main"
296
+ - "article"
297
+ - ".entry-content"
298
+
299
+ - name: "CB Trust – PDFs (deep fetch)"
300
+ type: http_pdf
301
+ enabled: true
302
+ url_patterns:
303
+ - "https://cbtrust.org/*/*.pdf"
304
+ geo: "MD"
305
+ categories: ["capacity_building"]
306
+
307
+ # ---------- OPTIONAL: Curated JSON (enable after you generate it) ----------
308
  - name: "State 5310 Listings (curated JSON)"
309
  type: json_static
310
  enabled: false # set to true once you generate the file below
project-plan-rag.rtf ADDED
@@ -0,0 +1,569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {\rtf1\ansi\ansicpg1252\cocoartf2822
2
+ \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\froman\fcharset0 Times-Bold;\f1\froman\fcharset0 Times-Roman;\f2\fmodern\fcharset0 Courier;
3
+ \f3\froman\fcharset0 TimesNewRomanPSMT;\f4\fnil\fcharset0 AppleColorEmoji;\f5\froman\fcharset0 Times-Italic;
4
+ \f6\fnil\fcharset0 Menlo-Regular;\f7\fnil\fcharset0 HelveticaNeue;}
5
+ {\colortbl;\red255\green255\blue255;\red0\green0\blue0;\red0\green0\blue233;\red109\green109\blue109;
6
+ \red109\green109\blue109;\red0\green0\blue0;}
7
+ {\*\expandedcolortbl;;\cssrgb\c0\c0\c0;\cssrgb\c0\c0\c93333;\cssrgb\c50196\c50196\c50196;
8
+ \cssrgb\c50196\c50196\c50196;\cssrgb\c0\c0\c0\c84706;}
9
+ {\*\listtable{\list\listtemplateid1\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid1\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid1}
10
+ {\list\listtemplateid2\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}}{\leveltext\leveltemplateid101\'01\'00;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{circle\}}{\leveltext\leveltemplateid102\'01\uc0\u9702 ;}{\levelnumbers;}\fi-360\li1440\lin1440 }{\listname ;}\listid2}
11
+ {\list\listtemplateid3\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid201\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid3}
12
+ {\list\listtemplateid4\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}}{\leveltext\leveltemplateid301\'01\'00;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{circle\}}{\leveltext\leveltemplateid302\'01\uc0\u9702 ;}{\levelnumbers;}\fi-360\li1440\lin1440 }{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{square\}}{\leveltext\leveltemplateid303\'01\uc0\u9642 ;}{\levelnumbers;}\fi-360\li2160\lin2160 }{\listname ;}\listid4}
13
+ {\list\listtemplateid5\listhybrid{\listlevel\levelnfc23\levelnfcn23\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{disc\}}{\leveltext\leveltemplateid401\'01\uc0\u8226 ;}{\levelnumbers;}\fi-360\li720\lin720 }{\listname ;}\listid5}
14
+ {\list\listtemplateid6\listhybrid{\listlevel\levelnfc0\levelnfcn0\leveljc0\leveljcn0\levelfollow0\levelstartat1\levelspace360\levelindent0{\*\levelmarker \{decimal\}}{\leveltext\leveltemplateid501\'01\'00;}{\levelnumbers\'01;}\fi-360\li720\lin720 }{\listname ;}\listid6}}
15
+ {\*\listoverridetable{\listoverride\listid1\listoverridecount0\ls1}{\listoverride\listid2\listoverridecount0\ls2}{\listoverride\listid3\listoverridecount0\ls3}{\listoverride\listid4\listoverridecount0\ls4}{\listoverride\listid5\listoverridecount0\ls5}{\listoverride\listid6\listoverridecount0\ls6}}
16
+ \margl1440\margr1440\vieww31340\viewh19300\viewkind0
17
+ \deftab720
18
+ \pard\pardeftab720\sa298\partightenfactor0
19
+
20
+ \f0\b\fs36 \cf0 \expnd0\expndtw0\kerning0
21
+ Best practices & features to include\
22
+ \pard\pardeftab720\sa240\partightenfactor0
23
+
24
+ \f1\b0\fs24 \cf0 From the literature + what you\'92ve built already, here are features that improve quality & usability. {\field{\*\fldinst{HYPERLINK "https://www.funraise.org/blog/grant-management-software-for-nonprofits?utm_source=chatgpt.com"}}{\fldrslt \cf3 \ul \ulc3 NetSuite+3Funraise+3Fluxx+3}}\
25
+
26
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrt\brdrnil \trbrdrl\brdrnil \trbrdrr\brdrnil
27
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
28
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
29
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
30
+ \pard\intbl\itap1\pardeftab720\qc\partightenfactor0
31
+
32
+ \f0\b \cf0 Feature\cell
33
+ \pard\intbl\itap1\pardeftab720\qc\partightenfactor0
34
+ \cf0 Why it matters\cell
35
+ \pard\intbl\itap1\pardeftab720\qc\partightenfactor0
36
+ \cf0 How to implement / what to watch out for\cell \row
37
+
38
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
39
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
40
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
41
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
42
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
43
+ \cf0 Opportunity filters
44
+ \f1\b0 (keywords, geography, type, capacity-building etc.)\cell
45
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
46
+ \cf0 Helps users narrow to what matters, reduces overload.\cell
47
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
48
+ \cf0 You have keyword filters already. Also include date, state, amount range, \'93open vs closed\'94 status.\cell \row
49
+
50
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
51
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
52
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
53
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
54
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
55
+
56
+ \f0\b \cf0 Deadline alerts / reminders
57
+ \f1\b0 \cell
58
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
59
+ \cf0 Prevents missing grants because deadlines slip.\cell
60
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
61
+ \cf0 Track
62
+ \f2\fs26 deadline
63
+ \f1\fs24 (if parsed), then show upcoming ones. Allow export to calendar or reminders.\cell \row
64
+
65
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
66
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
67
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
68
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
69
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
70
+
71
+ \f0\b \cf0 Document / PDF fetching + parsing
72
+ \f1\b0 \cell
73
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
74
+ \cf0 Many state sources have PDFs, RFPs etc. Users want details, not just summary.\cell
75
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
76
+ \cf0 Use PDF adapter + follow-links. Flag PDFs clearly in UI.\cell \row
77
+
78
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
79
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
80
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
81
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
82
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
83
+
84
+ \f0\b \cf0 Staleness / expiration detection
85
+ \f1\b0 \cell
86
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
87
+ \cf0 Projects with expired deadlines clutter feeds.\cell
88
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
89
+ \cf0 Tag items with \'93deadline passed\'94 or \'93pending\'94 etc. Maybe auto-hide old ones after some time.\cell \row
90
+
91
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
92
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
93
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
94
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
95
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
96
+
97
+ \f0\b \cf0 User feedback / manual review / save items
98
+ \f1\b0 \cell
99
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
100
+ \cf0 Users can flag false positives, save promising ones.\cell
101
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
102
+ \cf0 Add \'93bookmark / save\'94 or \'93dismiss\'94 features. Could feed into machine learning or heuristics over time.\cell \row
103
+
104
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
105
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
106
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
107
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
108
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
109
+
110
+ \f0\b \cf0 Dashboard / Analytics
111
+ \f1\b0 \cell
112
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
113
+ \cf0 Helps see grant volume by type, deadlines, states, etc.\cell
114
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
115
+ \cf0 Simple charts like # grants by month, # capacity-building grants vs total, etc.\cell \row
116
+
117
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
118
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
119
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
120
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
121
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
122
+
123
+ \f0\b \cf0 Good defaults / simple UI
124
+ \f1\b0 \cell
125
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
126
+ \cf0 Avoid cognitive overload \'97 show only essentials, allow advanced filtering if needed.\cell
127
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
128
+ \cf0 E.g. show title, deadline, source, eligibility; hide long descriptions by default.\cell \row
129
+
130
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrr\brdrnil
131
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
132
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
133
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
134
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
135
+
136
+ \f0\b \cf0 Mobile-friendly / responsive
137
+ \f1\b0 \cell
138
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
139
+ \cf0 Many will check on phones or tablets.\cell
140
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
141
+ \cf0 If web UI, ensure collapsible fields, simple menus.\cell \row
142
+
143
+ \itap1\trowd \taflags0 \trgaph108\trleft-108 \trbrdrl\brdrnil \trbrdrt\brdrnil \trbrdrr\brdrnil
144
+ \clvertalc \clshdrawnil \clwWidth4256\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx2880
145
+ \clvertalc \clshdrawnil \clwWidth5037\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx5760
146
+ \clvertalc \clshdrawnil \clwWidth6106\clftsWidth3 \clmart10 \clmarl10 \clmarb10 \clmarr10 \clbrdrt\brdrnil \clbrdrl\brdrnil \clbrdrb\brdrnil \clbrdrr\brdrnil \clpadt20 \clpadl20 \clpadb20 \clpadr20 \gaph\cellx8640
147
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
148
+
149
+ \f0\b \cf0 Integrations
150
+ \f1\b0 \cell
151
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
152
+ \cf0 E.g. calendar, Slack/email alerts.\cell
153
+ \pard\intbl\itap1\pardeftab720\partightenfactor0
154
+ \cf0 Helps push notifications rather than only manual checking.\cell \lastrow\row
155
+ \pard\pardeftab720\partightenfactor0
156
+ \cf4 \
157
+ \pard\pardeftab720\sa298\partightenfactor0
158
+
159
+ \f0\b\fs36 \cf0 What to avoid / limit to keep usability\
160
+ \pard\pardeftab720\sa240\partightenfactor0
161
+
162
+ \f1\b0\fs24 \cf0 These things often add data/noise or complexity, so either avoid or gate them behind \'93advanced\'94 toggles.\
163
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
164
+ \ls1\ilvl0
165
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
166
+ Over-broad scraping
167
+ \f1\b0 : pulling every \'93policy\'94, \'93program info\'94, \'93newsletter\'94, etc. just because capacity keywords appear once.\
168
+ \ls1\ilvl0
169
+ \f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
170
+ Too much automation without oversight
171
+ \f1\b0 : e.g. hidden deadlines if PDF text parsing fails.\
172
+ \ls1\ilvl0
173
+ \f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
174
+ Overwhelming metadata fields
175
+ \f1\b0 : stuff like \'93agency budget history\'94, \'93application score weights\'94 etc unless users request them.\
176
+ \ls1\ilvl0
177
+ \f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
178
+ Frequent UI changes or too many fields
179
+ \f1\b0 \'97 keep interface consistent.\
180
+ \ls1\ilvl0
181
+ \f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
182
+ Large batch of false positives
183
+ \f1\b0 \'97 this ruins trust quickly.\
184
+ \pard\pardeftab720\partightenfactor0
185
+ \cf4 \
186
+ \pard\pardeftab720\sa298\partightenfactor0
187
+
188
+ \f0\b\fs36 \cf0 What to build next in your tool\
189
+ \pard\pardeftab720\sa240\partightenfactor0
190
+
191
+ \f1\b0\fs24 \cf0 Here are prioritized improvements/next features for your tool to make it more powerful while preserving clarity:\
192
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
193
+ \ls2\ilvl0
194
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 1 }\expnd0\expndtw0\kerning0
195
+ Deadline extraction / detection
196
+ \f1\b0 \
197
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
198
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
199
+ \f3 \uc0\u9702
200
+ \f1 }\expnd0\expndtw0\kerning0
201
+ From Grants.gov API: likely available.\
202
+ \ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
203
+ \f3 \uc0\u9702
204
+ \f1 }\expnd0\expndtw0\kerning0
205
+ From scraped state pages / PDFs: attempt to parse \'93deadline\'94, \'93closing date\'94. If missing, mark as \'93TBD\'94.\
206
+ \ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
207
+ \f3 \uc0\u9702
208
+ \f1 }\expnd0\expndtw0\kerning0
209
+ UI: highlight upcoming deadlines (\'93Due in next 30 days\'94).\
210
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
211
+ \ls2\ilvl0
212
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 2 }\expnd0\expndtw0\kerning0
213
+ \'93Open / Active\'94 flag
214
+ \f1\b0 \
215
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
216
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
217
+ \f3 \uc0\u9702
218
+ \f1 }\expnd0\expndtw0\kerning0
219
+ If RFPs are active or you can detect \'93application now open\'94 in text.\
220
+ \ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
221
+ \f3 \uc0\u9702
222
+ \f1 }\expnd0\expndtw0\kerning0
223
+ If not, mark as \'93reference / program\'94 so users know it\'92s structural info, not a live call.\
224
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
225
+ \ls2\ilvl0
226
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 3 }\expnd0\expndtw0\kerning0
227
+ Bookmark / dismiss / feedback
228
+ \f1\b0 \
229
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
230
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
231
+ \f3 \uc0\u9702
232
+ \f1 }\expnd0\expndtw0\kerning0
233
+ Users should be able to mark \'93this is useful\'94 or \'93not relevant\'94 to train future filtering.\
234
+ \ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
235
+ \f3 \uc0\u9702
236
+ \f1 }\expnd0\expndtw0\kerning0
237
+ Possibly store local tags (e.g. \'93my state\'94, \'93my priority\'94).\
238
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
239
+ \ls2\ilvl0
240
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 4 }\expnd0\expndtw0\kerning0
241
+ Improved link / PDF following
242
+ \f1\b0 \
243
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
244
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
245
+ \f3 \uc0\u9702
246
+ \f1 }\expnd0\expndtw0\kerning0
247
+ As suggested earlier.\
248
+ \ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
249
+ \f3 \uc0\u9702
250
+ \f1 }\expnd0\expndtw0\kerning0
251
+ Make sure link selectors are fine-tuned for each state source.\
252
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
253
+ \ls2\ilvl0
254
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 5 }\expnd0\expndtw0\kerning0
255
+ Expired / historical items archive
256
+ \f1\b0 \
257
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
258
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
259
+ \f3 \uc0\u9702
260
+ \f1 }\expnd0\expndtw0\kerning0
261
+ Hide automatically unless explicitly requested. Keeps main view clean.\
262
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
263
+ \ls2\ilvl0
264
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 6 }\expnd0\expndtw0\kerning0
265
+ Search + sort + filter UI in front end
266
+ \f1\b0 \
267
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
268
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
269
+ \f3 \uc0\u9702
270
+ \f1 }\expnd0\expndtw0\kerning0
271
+ Filters: State, source type (federal/state), capacity vs other, keyword.\
272
+ \ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
273
+ \f3 \uc0\u9702
274
+ \f1 }\expnd0\expndtw0\kerning0
275
+ Sort by deadline, date posted, amount (if available).\
276
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
277
+ \ls2\ilvl0
278
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 7 }\expnd0\expndtw0\kerning0
279
+ Notifications / reminders
280
+ \f1\b0 \
281
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
282
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
283
+ \f3 \uc0\u9702
284
+ \f1 }\expnd0\expndtw0\kerning0
285
+ Let users get alerts (email / Slack / calendar) for items that match their saved filters and have upcoming deadlines.\
286
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
287
+ \ls2\ilvl0
288
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 8 }\expnd0\expndtw0\kerning0
289
+ Confidence score or matching hint
290
+ \f1\b0 \
291
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
292
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
293
+ \f3 \uc0\u9702
294
+ \f1 }\expnd0\expndtw0\kerning0
295
+ For scraped items, display \'93match strength\'94 (how many capacity keywords matched, whether in title vs body). Helps users see which items are likely relevant.\
296
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
297
+ \ls2\ilvl0
298
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 9 }\expnd0\expndtw0\kerning0
299
+ Performance / scheduling
300
+ \f1\b0 \
301
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
302
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
303
+ \f3 \uc0\u9702
304
+ \f1 }\expnd0\expndtw0\kerning0
305
+ Check feeds regularly, avoid stale caches.\
306
+ \ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
307
+ \f3 \uc0\u9702
308
+ \f1 }\expnd0\expndtw0\kerning0
309
+ Maybe incremental ingest (only new items) rather than full crawl all the time.\
310
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
311
+ \ls2\ilvl0
312
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext 10 }\expnd0\expndtw0\kerning0
313
+ Testing & feedback
314
+ \f1\b0 \
315
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
316
+ \ls2\ilvl1\cf0 \kerning1\expnd0\expndtw0 {\listtext
317
+ \f3 \uc0\u9702
318
+ \f1 }\expnd0\expndtw0\kerning0
319
+ Ask actual users (your team) to test beta versions and tell you what\'92s too much / too little.\
320
+ \ls2\ilvl1\kerning1\expnd0\expndtw0 {\listtext
321
+ \f3 \uc0\u9702
322
+ \f1 }\expnd0\expndtw0\kerning0
323
+ Adjust based on real use.\
324
+ \pard\pardeftab720\partightenfactor0
325
+ \cf4 \
326
+ \pard\pardeftab720\sa298\partightenfactor0
327
+
328
+ \f0\b\fs36 \cf0 Your roadmap can look like this\
329
+ \pard\pardeftab720\sa240\partightenfactor0
330
+
331
+ \f1\b0\fs24 \cf0 Here\'92s a potential plan for the next few sprints:\
332
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
333
+ \ls3\ilvl0
334
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
335
+ Sprint 1
336
+ \f1\b0 (now): Add deadline extraction + \'93active\'94 flag + improved link following for state sources.\
337
+ \ls3\ilvl0
338
+ \f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
339
+ Sprint 2
340
+ \f1\b0 : Bookmark/dismiss features + advanced filtering UI + sort by deadline.\
341
+ \ls3\ilvl0
342
+ \f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
343
+ Sprint 3
344
+ \f1\b0 : Notifications/reminders + mobile UI polish + confidence scoring.\
345
+ \ls3\ilvl0
346
+ \f0\b \kerning1\expnd0\expndtw0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
347
+ Sprint 4
348
+ \f1\b0 : Incorporate JS-rendered sources (Playwright) + archive/historical mode.\
349
+ \pard\tx720\pardeftab720\sa240\partightenfactor0
350
+ \cf0 \
351
+ \pard\pardeftab720\sa240\partightenfactor0
352
+ \cf0 \outl0\strokewidth0 \strokec2 Yes\'97exactly. We can treat the roadmap I outlined earlier as
353
+ \f0\b four sprints
354
+ \f1\b0 .\uc0\u8232 Here\'92s a clear
355
+ \f0\b Sprint 1 plan
356
+ \f1\b0 (2\'963 weeks is typical) so we can start delivering value quickly while keeping the other sprints in view.\
357
+ \pard\pardeftab720\partightenfactor0
358
+ \cf5 \strokec5 \
359
+ \pard\pardeftab720\sa298\partightenfactor0
360
+
361
+ \f4\fs36 \cf0 \strokec2 \uc0\u55356 \u57281
362
+ \f0\b Sprint 1 \'96 \'93Actionable Deadlines & Better State Data\'94\
363
+ \pard\pardeftab720\sa240\partightenfactor0
364
+
365
+ \fs24 \cf0 Goal:
366
+ \f1\b0 Surface
367
+ \f5\i current
368
+ \f1\i0 and
369
+ \f5\i timely
370
+ \f1\i0 grant opportunities\'97especially from PA & MD\'97without flooding users.\
371
+ \pard\pardeftab720\sa280\partightenfactor0
372
+
373
+ \f0\b\fs28 \cf0 Deliverables\
374
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
375
+ \ls4\ilvl0
376
+ \fs24 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 1 }\expnd0\expndtw0\kerning0
377
+ \outl0\strokewidth0 \strokec2 Deadline Extraction & \'93Active\'94 Flag
378
+ \f1\b0 \
379
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
380
+ \ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
381
+ \f3 \uc0\u9702
382
+ \f1 }\expnd0\expndtw0\kerning0
383
+ \outl0\strokewidth0 \strokec2 Parse
384
+ \f2\fs26 deadline
385
+ \f1\fs24 /
386
+ \f2\fs26 closing date
387
+ \f1\fs24 text from Grants.gov (API gives it directly).\
388
+ \ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
389
+ \f3 \uc0\u9702
390
+ \f1 }\expnd0\expndtw0\kerning0
391
+ \outl0\strokewidth0 \strokec2 Add a lightweight NLP/regex extractor for state HTML/PDFs (\'93Deadline\'94, \'93Applications due\'94, etc.).\
392
+ \ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
393
+ \f3 \uc0\u9702
394
+ \f1 }\expnd0\expndtw0\kerning0
395
+ \outl0\strokewidth0 \strokec2 In the index, add fields:
396
+ \f2\fs26 deadline_date
397
+ \f1\fs24 ,
398
+ \f2\fs26 is_active
399
+ \f1\fs24 (true if deadline >= today or marked open/TBD).\
400
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
401
+ \ls4\ilvl0
402
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 2 }\expnd0\expndtw0\kerning0
403
+ \outl0\strokewidth0 \strokec2 UI Updates in the Dashboard
404
+ \f1\b0 \
405
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
406
+ \ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
407
+ \f3 \uc0\u9702
408
+ \f1 }\expnd0\expndtw0\kerning0
409
+ \outl0\strokewidth0 \strokec2 Show deadlines and days-to-close (already in the mock).\
410
+ \ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
411
+ \f3 \uc0\u9702
412
+ \f1 }\expnd0\expndtw0\kerning0
413
+ \outl0\strokewidth0 \strokec2 Add a small
414
+ \f0\b \'93Active Only\'94
415
+ \f1\b0 toggle to hide expired or guideline-only records.\
416
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
417
+ \ls4\ilvl0
418
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 3 }\expnd0\expndtw0\kerning0
419
+ \outl0\strokewidth0 \strokec2 Link-Following Enhancements
420
+ \f1\b0 \
421
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
422
+ \ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
423
+ \f3 \uc0\u9702
424
+ \f1 }\expnd0\expndtw0\kerning0
425
+ \outl0\strokewidth0 \strokec2 Update YAML for
426
+ \f0\b MD DHCD Press
427
+ \f1\b0 ,
428
+ \f0\b MTA
429
+ \f1\b0 ,
430
+ \f0\b PennDOT
431
+ \f1\b0 , etc. to:\
432
+ \pard\tx1660\tx2160\pardeftab720\li2160\fi-2160\sa240\partightenfactor0
433
+ \ls4\ilvl2
434
+ \f2\fs26 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
435
+ \f6 \uc0\u9642
436
+ \f2 }\expnd0\expndtw0\kerning0
437
+ \outl0\strokewidth0 \strokec2 parse.follow_links: true
438
+ \f1\fs24 \
439
+ \ls4\ilvl2
440
+ \f2\fs26 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
441
+ \f6 \uc0\u9642
442
+ \f2 }\expnd0\expndtw0\kerning0
443
+ \outl0\strokewidth0 \strokec2 crawl.max_depth: 1
444
+ \f1\fs24 \
445
+ \ls4\ilvl2
446
+ \f2\fs26 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
447
+ \f6 \uc0\u9642
448
+ \f2 }\expnd0\expndtw0\kerning0
449
+ \outl0\strokewidth0 \strokec2 link_selectors
450
+ \f1\fs24 for PDFs / NOFO pages\
451
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
452
+ \ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
453
+ \f3 \uc0\u9702
454
+ \f1 }\expnd0\expndtw0\kerning0
455
+ \outl0\strokewidth0 \strokec2 Ensure adapter fetches and normalizes these linked pages.\
456
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
457
+ \ls4\ilvl0
458
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 4 }\expnd0\expndtw0\kerning0
459
+ \outl0\strokewidth0 \strokec2 Improved Capacity Filter
460
+ \f1\b0 \
461
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
462
+ \ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
463
+ \f3 \uc0\u9702
464
+ \f1 }\expnd0\expndtw0\kerning0
465
+ \outl0\strokewidth0 \strokec2 Add a few transportation-related capacity phrases (\'93service capacity\'94, \'93provider capacity\'94) to capture more relevant 5310-type opportunities.\
466
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
467
+ \ls4\ilvl0
468
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 5 }\expnd0\expndtw0\kerning0
469
+ \outl0\strokewidth0 \strokec2 Basic Quality Checks
470
+ \f1\b0 \
471
+ \pard\tx940\tx1440\pardeftab720\li1440\fi-1440\sa240\partightenfactor0
472
+ \ls4\ilvl1\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
473
+ \f3 \uc0\u9702
474
+ \f1 }\expnd0\expndtw0\kerning0
475
+ \outl0\strokewidth0 \strokec2 Logging: show counts of items skipped due to expired deadlines or missing text.\
476
+ \ls4\ilvl1\kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext
477
+ \f3 \uc0\u9702
478
+ \f1 }\expnd0\expndtw0\kerning0
479
+ \outl0\strokewidth0 \strokec2 Quick manual review with your team to confirm that the new records are genuinely grant opportunities.\
480
+ \pard\pardeftab720\partightenfactor0
481
+ \cf5 \strokec5 \
482
+ \pard\pardeftab720\sa298\partightenfactor0
483
+
484
+ \f4\fs36 \cf0 \strokec2 \uc0\u55357 \u56621
485
+ \f0\b Sprint 2\'964 Preview\
486
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
487
+ \ls5\ilvl0
488
+ \fs24 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
489
+ \outl0\strokewidth0 \strokec2 Sprint 2:
490
+ \f1\b0 Bookmarks & Dismiss, Advanced Filtering UI, Sorting by deadline/confidence.\
491
+ \ls5\ilvl0
492
+ \f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
493
+ \outl0\strokewidth0 \strokec2 Sprint 3:
494
+ \f1\b0 Notifications (email/Slack/ICS calendar) + mobile UI polish + confidence scoring display.\
495
+ \ls5\ilvl0
496
+ \f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext \uc0\u8226 }\expnd0\expndtw0\kerning0
497
+ \outl0\strokewidth0 \strokec2 Sprint 4:
498
+ \f1\b0 Full
499
+ \f0\b JS-rendered
500
+ \f1\b0 source support (Playwright for MD OneStop etc.) and historical archive mode.\
501
+ \pard\pardeftab720\partightenfactor0
502
+ \cf5 \strokec5 \
503
+ \pard\pardeftab720\sa298\partightenfactor0
504
+
505
+ \f0\b\fs36 \cf0 \strokec2 Next Steps to Kick Off Sprint 1\
506
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
507
+ \ls6\ilvl0
508
+ \fs24 \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 1 }\expnd0\expndtw0\kerning0
509
+ \outl0\strokewidth0 \strokec2 Confirm dev environment
510
+ \f1\b0 \'96 you already have
511
+ \f2\fs26 grants-rag
512
+ \f1\fs24 running locally.\
513
+ \ls6\ilvl0
514
+ \f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 2 }\expnd0\expndtw0\kerning0
515
+ \outl0\strokewidth0 \strokec2 Create a new branch
516
+ \f1\b0 :\uc0\u8232
517
+ \f2\fs26 \uc0\u8232 \u8232 \u8232 git checkout -b sprint1-deadline-active-flag\
518
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\partightenfactor0
519
+ \ls6\ilvl0\cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 3 }\expnd0\expndtw0\kerning0
520
+ \outl0\strokewidth0 \strokec2 \uc0\u8232 \u8232
521
+ \f1\fs24 \
522
+ \pard\tx220\tx720\pardeftab720\li720\fi-720\sa240\partightenfactor0
523
+ \ls6\ilvl0
524
+ \f0\b \cf0 \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 4 }\expnd0\expndtw0\kerning0
525
+ \outl0\strokewidth0 \strokec2 Add extraction utilities
526
+ \f1\b0 \uc0\u8232
527
+ \f5\i Regex for state HTML/PDFs
528
+ \f1\i0 and update
529
+ \f2\fs26 _normalize_web_record
530
+ \f1\fs24 to accept
531
+ \f2\fs26 deadline_date
532
+ \f1\fs24 and
533
+ \f2\fs26 is_active
534
+ \f1\fs24 .\
535
+ \ls6\ilvl0
536
+ \f0\b \kerning1\expnd0\expndtw0 \outl0\strokewidth0 {\listtext 5 }\expnd0\expndtw0\kerning0
537
+ \outl0\strokewidth0 \strokec2 UI
538
+ \f1\b0 : enable the
539
+ \f0\b Active Only
540
+ \f1\b0 toggle (mock is already designed for it).\
541
+ \pard\pardeftab720\partightenfactor0
542
+ \cf5 \strokec5 \
543
+ \pard\pardeftab720\sa240\partightenfactor0
544
+
545
+ \f0\b \cf0 \strokec2 Timeline suggestion:
546
+ \f1\b0 2 weeks dev + 1 week QA / user feedback.\uc0\u8232 Once Sprint 1 is merged, we\'92ll have a tool that surfaces only
547
+ \f5\i active
548
+ \f1\i0 capacity-building grants with clear deadlines\'97exactly the balance of \'93rich data without overload\'94 we\'92re after.\
549
+ \pard\pardeftab720\partightenfactor0
550
+ \cf0 \
551
+ \
552
+ \
553
+ \
554
+ \
555
+ \
556
+ \
557
+ \pard\pardeftab720\sa240\partightenfactor0
558
+ \cf0 \
559
+ \pard\pardeftab720\partightenfactor0
560
+ \cf0 \
561
+ \pard\pardeftab720\qc\partightenfactor0
562
+
563
+ \f7\fs22 \cf6 \strokec6 \
564
+ \pard\pardeftab720\partightenfactor0
565
+
566
+ \f1\fs24 \cf0 \strokec2 ChatGPT can make mistakes. Check important info.\
567
+ \pard\tx720\pardeftab720\sa240\partightenfactor0
568
+ \cf0 \outl0\strokewidth0 \
569
+ }