import chug
task_cfg = chug.DataTaskDocReadCfg(
page_sampling='all',
)
data_cfg = chug.DataCfg(
source='pixparse/pdfa-eng-wds',
split='train',
batch_size=None,
format='hfids',
num_workers=0,
)
data_loader = chug.create_loader(
data_cfg,
task_cfg,
)
sample = next(iter(data_loader))