Spaces:
Runtime error
Runtime error
{ | |
"dataset": { | |
"name": "George-API/cognitive-data", | |
"split": "train", | |
"column_mapping": { | |
"text": "conversations", | |
"id": "id" | |
}, | |
"processing": { | |
"sort_by_id": true, | |
"maintain_paper_order": true, | |
"max_seq_length": 2048 | |
} | |
}, | |
"data_formatting": { | |
"chat_template": "phi", | |
"roles": { | |
"system": "System: {content}\n\n", | |
"human": "Human: {content}\n\n", | |
"assistant": "Assistant: {content}\n\n" | |
}, | |
"metadata_handling": { | |
"include_paper_id": true, | |
"include_chunk_number": true, | |
"metadata_format": "Paper ID: {paper_id} | Chunk: {chunk_number}" | |
} | |
}, | |
"data_loading": { | |
"batch_size": 16, | |
"shuffle": false, | |
"drop_last": false, | |
"num_workers": 2, | |
"pin_memory": false | |
}, | |
"validation": { | |
"log_samples": 3, | |
"log_interval": 50, | |
"metrics": ["processed", "skipped", "avg_tokens", "unique_papers"] | |
} | |
} |