# Description: Preload the dataset to cache_dir | |
# Copyright (C) 2024 Ronan Le Meillat | |
# License: Apache License 2.0 | |
from datasets import load_dataset | |
dataset_id = "eltorio/ROCO-radiology" | |
cache_dir = "/workspace/data" | |
train_dataset = load_dataset(dataset_id, split="train", cache_dir=cache_dir) |