--- license: apache-2.0 --- ``` { "method": "orpo", "dataset": "autoredteam", "model": "togethercomputer/RedPajama-INCITE-Base-3B-v1", "tokenizer": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", "train_data_path": "/leonardo_work/EUHPC_E03_068/datasets/working/autoredteam_helpfulness_v1-train.jsonl", "test_data_path": "/leonardo_work/EUHPC_E03_068/datasets/working/autoredteam_helpfulness_v1-test.jsonl", "lr": 0.01, "train_batch_size": 600, "eval_batch_size": 600, "num_epochs": 1, "seed": 42, "eval_only": False, "evaluation_size": None, "gradient_accumulation_steps": 5, "checkpoint_path": None, "experiment_name": "RedPajama3b_v1-autoredteam_helpfulness-train", "experiment_group": "results", "reference_model": "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", "context_length": 1024, "train_summarization": "", "dpo_beta": 0.1, "orpo_beta": 0.1, "kl_coef": 0.0, "reward_model": "", "bestofn_size": 4, "train_reward_model": "", } ```