schema: '2.0' stages: subset: cmd: python src/subset_data.py deps: - path: data/raw/Wellcome-grants-awarded-1-October-2005-to-04-05-2022.csv md5: 5c0d0e532709648b61625e7e130dfaa4 size: 31028261 - path: src/subset_data.py md5: f4cffd497cb8341cf05728e89cbb0871 size: 1008 params: params.yaml: n_docs: 500 outs: - path: data/processed/wellcome_grant_descriptions.csv md5: 18dd6a7611d7f53b1067def7ba075cba size: 644736 entities: cmd: python src/process_docs.py deps: - path: data/processed/wellcome_grant_descriptions.csv md5: 18dd6a7611d7f53b1067def7ba075cba size: 644736 - path: src/process_docs.py md5: 1f570b1aa0f44b0bb131317c305deff5 size: 1309 outs: - path: data/processed/entities.jsonl md5: 26846cdd657a516281b24c376a93f018 size: 214902