| task: pile10k | |
| task_config: '{''task'': ''pile10k'', ''dataset_path'': ''timaeus/dsir-pile-10k'', | |
| ''output_type'': ''loglikelihood_rolling'', ''training_split'': ''train'', ''test_split'': | |
| ''train'', ''validation_split'': None, ''doc_to_target'': ''{{contents}}'', ''doc_to_text'': | |
| ''{{contents}}'', ''process_docs'': None, ''process_results'': None, ''should_decontaminate'': | |
| True, ''doc_to_decontamination_query'': ''{{page}}'', ''metric_list'': [{''metric'': | |
| ''word_perplexity''}, {''metric'': ''byte_perplexity''}, {''metric'': ''bits_per_byte''}], | |
| ''metadata'': {''version'': 1.0}, ''dataset_kwargs'': {''trust_remote_code'': True}}' | |
| dataset_split: training_split | |
| include_path: shared/aether/config/tasks/ | |
| save_path: .//data/ | |
| force_reload: 'False' | |
| truncate: auto | |
| prefix: '' | |