Spaces:
Runtime error
Runtime error
| # app/main.py | |
| import argparse, os, json | |
| from pathlib import Path | |
| from dotenv import dotenv_values | |
| import pandas as pd | |
| from app.ingest import ingest | |
| from app.search import search | |
| from app.paths import DATA_DIR, DOCSTORE_DIR, INDEX_DIR, EXPORT_DIR # β canonical paths | |
| def get_env(): | |
| """ | |
| Load environment with safe, repo-relative defaults from app.paths. | |
| - Honors .env (dotenv) and real env vars if set. | |
| - Falls back to ./data, ./data/docstore, ./data/index, ./data/exports | |
| which work on macOS AND Hugging Face Spaces. | |
| """ | |
| # 1) Start with .env (if present) | |
| env = dict(dotenv_values(".env") or {}) | |
| # 2) Merge in process env (so Space secrets / shell vars override .env) | |
| for k, v in os.environ.items(): | |
| env[k] = v | |
| # 3) Provide safe defaults from app.paths if not specified | |
| env.setdefault("DATA_DIR", str(DATA_DIR)) | |
| env.setdefault("DOCSTORE_DIR", str(DOCSTORE_DIR)) | |
| env.setdefault("INDEX_DIR", str(INDEX_DIR)) | |
| env.setdefault("EXPORT_DIR", str(EXPORT_DIR)) | |
| # Optional UI/debug flags | |
| env.setdefault("SHOW_DEV", "0") | |
| # 4) Ensure directories exist | |
| for k in ("DATA_DIR", "DOCSTORE_DIR", "INDEX_DIR", "EXPORT_DIR"): | |
| Path(env[k]).mkdir(parents=True, exist_ok=True) | |
| return env | |
| def ensure_index_exists(env: dict): | |
| """ | |
| Ensure a FAISS index exists in env['INDEX_DIR']. | |
| If missing, run a minimal ingest using config/sources.yaml. | |
| """ | |
| index_dir = Path(env["INDEX_DIR"]) | |
| faiss_idx = index_dir / "faiss.index" | |
| meta_json = index_dir / "meta.json" | |
| if faiss_idx.exists() and meta_json.exists(): | |
| return # already built | |
| print("Index not found. Building now via ingest() β¦") | |
| # Ingest reads config and writes index/meta/docstore | |
| # If your ingest needs API keys, set them in Space Settings β Variables | |
| path, n = ingest("config/sources.yaml", env) | |
| print(f"Ingest complete. {n} records. Docstore: {path}") | |
| def cmd_ingest(_args): | |
| env = get_env() | |
| path, n = ingest("config/sources.yaml", env) | |
| print(f"Ingest complete. {n} records. Docstore: {path}") | |
| def cmd_search(args): | |
| env = get_env() | |
| ensure_index_exists(env) | |
| filters = {} | |
| if args.geo: | |
| filters["geo"] = args.geo.split(",") | |
| if args.categories: | |
| filters["categories"] = args.categories.split(",") | |
| res = search(args.q, env, top_k=args.k, filters=filters) | |
| for r in res: | |
| geo = r.get("geo") | |
| if isinstance(geo, list): | |
| geo = ",".join(geo) | |
| print(f"- {r.get('title','(no title)')} [{r.get('source','')}] ({geo}) score={r.get('score',0):.3f}") | |
| print(f" {r.get('url','')}") | |
| def cmd_export(args): | |
| env = get_env() | |
| ensure_index_exists(env) | |
| filters = {} | |
| if args.geo: | |
| filters["geo"] = args.geo.split(",") | |
| if args.categories: | |
| filters["categories"] = args.categories.split(",") | |
| res = search(args.q, env, top_k=args.k, filters=filters) | |
| out = Path(env["EXPORT_DIR"]) / (args.out or "results.csv") | |
| pd.DataFrame(res).to_csv(out, index=False) | |
| print(f"Exported {len(res)} rows to {out}") | |
| if __name__ == "__main__": | |
| p = argparse.ArgumentParser() | |
| sub = p.add_subparsers(dest="cmd") | |
| p_ing = sub.add_parser("ingest", help="Ingest sources and build index") | |
| p_ing.set_defaults(func=cmd_ingest) | |
| p_search = sub.add_parser("search", help="Search index") | |
| p_search.add_argument("--q", required=True) | |
| p_search.add_argument("--k", type=int, default=15) | |
| p_search.add_argument("--geo", default="") | |
| p_search.add_argument("--categories", default="") | |
| p_search.set_defaults(func=cmd_search) | |
| p_export = sub.add_parser("export", help="Export search results to CSV") | |
| p_export.add_argument("--q", required=True) | |
| p_export.add_argument("--k", type=int, default=50) | |
| p_export.add_argument("--geo", default="") | |
| p_export.add_argument("--categories", default="") | |
| p_export.add_argument("--out", default="results.csv") | |
| p_export.set_defaults(func=cmd_export) | |
| args = p.parse_args() | |
| if not args.cmd: | |
| p.print_help() | |
| else: | |
| args.func(args) | |