| model: | |
| name: meta-llama/Llama-3.2-1B | |
| learning_rate: 1.2e-05 | |
| ppo_epochs: 4 | |
| init_kl_coef: 0.3 | |
| target: 3 | |
| cliprange: 0.2 | |
| cliprange_value: 0.3 | |
| vf_coef: 0.15 | |
| adap_kl_ctrl: true | |
| use_score_norm: true | |
| ratio_threshold: 10.0 | |
| batch_size: 64 | |
| mini_batch_size: 8 | |
| forward_batch_size: 2 | |
| gradient_accumulation_steps: 8 | |
| reward_model: s-nlp/roberta_toxicity_classifier | |
| use_raw_logits: true | |
| generation: | |
| min_length: 5 | |
| max_new_tokens: 64 | |
| output_min_length: 15 | |
| output_max_length: 20 | |
| do_sample: true | |
| top_k: 0.0 | |
| top_p: 0.85 | |
| now: 2025-09-22_18-11-21 | |
| training: | |
| num_train_epochs: 100 | |
| save_freq: 20 | |
| eval_freq: 20 | |
| seed: 42 | |
| fast_start: true | |
| dataset: | |
| name: allenai/real-toxicity-prompts | |
| toxicity_threshold: 0.8 | |
| filter_metric: profanity | |
| input_min_text_length: 15 | |
| input_max_text_length: 20 | |
| test_size: 0.1 | |
| original_dataset_path: null | |
| detoxified_dataset_path: null | |
| output: | |
| push_to_hub: true | |
| push_checkpoints_to_hub: true | |
| checkpoint_push_freq: 20 | |
| organization: null | |
| repository_name: llama-3-2-1b-detox_v1f_SCALE8_round5 | |
| private: false | |
| wandb: | |
| project: irl_llms | |
| entity: null | |
| name: Llama-3.2-1B-2025-09-22_18-11-21 | |
| irl: | |
| posterior_dir: re_irl_min_stratified_plots/round_5 | |
| global_norm_dir: re_irl_min_stratified_plots | |
| base_model_name: null | |
| use_round: 5 | |
| sample_theta_each_step: true | |
| n_samples: 100 | |
| feature_max_length: 256 | |
| feature_batch_size: 16 | |
| use_platt: false | |
| platt_a: 1.0 | |
| platt_b: 0.0 | |
| features_on_cpu: false | |
| reward_scale: 8 | |
| reward_clip: 4 | |