File size: 3,469 Bytes
9a7996d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
rlhf:
model:
name: EleutherAI/pythia-160M
learning_rate: 1.0e-06
ppo_epochs: 4
init_kl_coef: 0.3
target: 4
cliprange: 0.2
cliprange_value: 0.2
vf_coef: 0.1
adap_kl_ctrl: true
use_score_norm: true
ratio_threshold: 10.0
batch_size: 64
mini_batch_size: 2
forward_batch_size: 2
gradient_accumulation_steps: 16
reward_model: facebook/roberta-hate-speech-dynabench-r4-target
use_raw_logits: true
generation:
min_length: 5
output_min_length: 15
output_max_length: 20
do_sample: true
top_k: 0.0
top_p: 1.0
training:
num_train_epochs: 100
save_freq: 20
eval_freq: 20
seed: 42
dataset:
name: allenai/real-toxicity-prompts
toxicity_threshold: 0.3
input_min_text_length: 15
input_max_text_length: 20
test_size: 0.1
output:
push_to_hub: true
organization: null
repository_name: pythia-160m-detox
wandb:
project: irl_llms
entity: null
name: null
irl:
mode: train
dataset:
original_model_name: EleutherAI/pythia-70M
detoxified_model_name: ajagota71/pythia-70m-detox-epoch-100
original_dataset_path: null
detoxified_dataset_path: null
cache_dir: ${hydra:runtime.cwd}/datasets
num_samples: 1000
max_new_tokens: 30
batch_size: 16
temperature: 0.7
top_p: 1.0
seed: ${seed}
use_cached: false
toxicity_threshold: 0.3
push_to_hub: false
hub_org: null
hub_token: ${oc.env:HF_TOKEN,null}
private: false
use_half_precision: null
model:
reward_model_base: null
use_half_precision: null
num_unfrozen_layers: 1
training:
irl_method: max_margin
learning_rate: 1.0e-05
epochs: 20
batch_size: 4
eval_interval: 5
max_length: 512
train_test_split: 0.8
grad_clip: 1.0
weight_decay: 0.01
margin: 0.1
temperature: 0.1
adam_epsilon: 1.0e-08
seed: ${seed}
include_prompt: true
output:
repo_name_prefix: irl-reward-model
base_dir: ${hydra:runtime.cwd}/outputs/irl
save_checkpoints: true
push_to_hub: false
hub_org: ajagota71
private: false
evaluation:
true_reward_model: facebook/roberta-hate-speech-dynabench-r4-target
logging:
project_name: irl-detoxification
use_wandb: true
wandb_mode: online
now: 2025-05-16_10-56-50
seed: 42
output_dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d_%H-%M-%S}
mode: train
model:
name: EleutherAI/pythia-160M
learning_rate: 1.0e-05
batch_size: 128
mini_batch_size: 8
forward_batch_size: 8
gradient_accumulation_steps: 8
reward_model: facebook/roberta-hate-speech-dynabench-r4-target
use_raw_logits: true
ppo_epochs: 4
init_kl_coef: 0.2
target: 6
cliprange: 0.2
cliprange_value: 0.2
vf_coef: 0.1
adap_kl_ctrl: true
use_score_norm: true
ratio_threshold: 10.0
generation:
min_length: 5
output_min_length: 15
output_max_length: 20
do_sample: true
top_k: 0.0
top_p: 1.0
training:
num_train_epochs: 200
save_freq: 50
eval_freq: 20
seed: 42
dataset:
name: allenai/real-toxicity-prompts
toxicity_threshold: 0.3
input_min_text_length: 15
input_max_text_length: 20
test_size: 0.1
output:
push_to_hub: true
push_checkpoints_to_hub: true
checkpoint_push_freq: 20
organization: ajagota71
repository_name: pythia-160m-fb-detox
private: false
wandb:
project: irl_llms
entity: null
name: pythia-160M-2025-05-16_10-56-50
|