File size: 3,464 Bytes
430e9c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
rlhf:
  model:
    name: EleutherAI/pythia-70M
    learning_rate: 1.0e-05
    ppo_epochs: 4
    init_kl_coef: 0.2
    target: 6
    cliprange: 0.2
    cliprange_value: 0.2
    vf_coef: 0.1
    adap_kl_ctrl: true
    use_score_norm: true
    ratio_threshold: 10.0
    batch_size: 128
    mini_batch_size: 8
    forward_batch_size: 8
    gradient_accumulation_steps: 8
    reward_model: facebook/roberta-hate-speech-dynabench-r4-target
    use_raw_logits: true
    generation:
      min_length: 5
      output_min_length: 15
      output_max_length: 20
      do_sample: true
      top_k: 0.0
      top_p: 1.0
  training:
    num_train_epochs: 100
    save_freq: 20
    eval_freq: 20
    seed: 42
  dataset:
    name: allenai/real-toxicity-prompts
    toxicity_threshold: 0.3
    input_min_text_length: 15
    input_max_text_length: 20
    test_size: 0.1
  output:
    push_to_hub: true
    organization: null
    repository_name: pythia-70m-detox
  wandb:
    project: irl_llms
    entity: null
    name: null
irl:
  mode: train
  dataset:
    original_model_name: EleutherAI/pythia-70M
    detoxified_model_name: ajagota71/pythia-70m-detox-epoch-100
    original_dataset_path: null
    detoxified_dataset_path: null
    cache_dir: ${hydra:runtime.cwd}/datasets
    num_samples: 1000
    max_new_tokens: 30
    batch_size: 16
    temperature: 0.7
    top_p: 1.0
    seed: ${seed}
    use_cached: false
    toxicity_threshold: 0.3
    push_to_hub: false
    hub_org: null
    hub_token: ${oc.env:HF_TOKEN,null}
    private: false
    use_half_precision: null
  model:
    reward_model_base: null
    use_half_precision: null
    num_unfrozen_layers: 1
  training:
    irl_method: max_margin
    learning_rate: 1.0e-05
    epochs: 20
    batch_size: 4
    eval_interval: 5
    max_length: 512
    train_test_split: 0.8
    grad_clip: 1.0
    weight_decay: 0.01
    margin: 0.1
    temperature: 0.1
    adam_epsilon: 1.0e-08
    seed: ${seed}
    include_prompt: true
  output:
    repo_name_prefix: irl-reward-model
    base_dir: ${hydra:runtime.cwd}/outputs/irl
    save_checkpoints: true
    push_to_hub: false
    hub_org: ajagota71
    private: false
  evaluation:
    true_reward_model: facebook/roberta-hate-speech-dynabench-r4-target
  logging:
    project_name: irl-detoxification
    use_wandb: true
    wandb_mode: online
now: 2025-05-16_09-29-11
seed: 42
output_dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d_%H-%M-%S}
mode: train
model:
  name: EleutherAI/pythia-70M
  learning_rate: 1.0e-05
  batch_size: 128
  mini_batch_size: 8
  forward_batch_size: 8
  gradient_accumulation_steps: 8
  reward_model: facebook/roberta-hate-speech-dynabench-r4-target
  use_raw_logits: true
  ppo_epochs: 4
  init_kl_coef: 0.2
  target: 6
  cliprange: 0.2
  cliprange_value: 0.2
  vf_coef: 0.1
  adap_kl_ctrl: true
  use_score_norm: true
  ratio_threshold: 10.0
  generation:
    min_length: 5
    output_min_length: 15
    output_max_length: 20
    do_sample: true
    top_k: 0.0
    top_p: 1.0
training:
  num_train_epochs: 200
  save_freq: 50
  eval_freq: 20
  seed: 42
dataset:
  name: allenai/real-toxicity-prompts
  toxicity_threshold: 0.3
  input_min_text_length: 15
  input_max_text_length: 20
  test_size: 0.1
output:
  push_to_hub: true
  push_checkpoints_to_hub: true
  checkpoint_push_freq: 20
  organization: ajagota71
  repository_name: pythia-70m-fb-detox
  private: false
wandb:
  project: irl_llms
  entity: null
  name: pythia-70M-2025-05-16_09-29-11