test-distillation / data.yaml
algo2217's picture
Upload final model (step 78) and all checkpoints at 2025-07-16T19:21:53.266656
f22c128 verified
raw
history blame contribute delete
790 Bytes
task: pile10k
task_config: '{''task'': ''pile10k'', ''dataset_path'': ''timaeus/dsir-pile-10k'',
''output_type'': ''loglikelihood_rolling'', ''training_split'': ''train'', ''test_split'':
''train'', ''validation_split'': None, ''doc_to_target'': ''{{contents}}'', ''doc_to_text'':
''{{contents}}'', ''process_docs'': None, ''process_results'': None, ''should_decontaminate'':
True, ''doc_to_decontamination_query'': ''{{page}}'', ''metric_list'': [{''metric'':
''word_perplexity''}, {''metric'': ''byte_perplexity''}, {''metric'': ''bits_per_byte''}],
''metadata'': {''version'': 1.0}, ''dataset_kwargs'': {''trust_remote_code'': True}}'
dataset_split: training_split
include_path: shared/aether/config/tasks/
save_path: .//data/
force_reload: 'False'
truncate: auto
prefix: ''