ajagota71's picture
Checkpoint after epoch 20
478e626 verified
rlhf:
model:
name: EleutherAI/pythia-160M
learning_rate: 1.0e-06
ppo_epochs: 4
init_kl_coef: 0.3
target: 4
cliprange: 0.2
cliprange_value: 0.2
vf_coef: 0.1
adap_kl_ctrl: true
use_score_norm: true
ratio_threshold: 10.0
batch_size: 64
mini_batch_size: 2
forward_batch_size: 2
gradient_accumulation_steps: 16
reward_model: facebook/roberta-hate-speech-dynabench-r4-target
use_raw_logits: true
generation:
min_length: 5
output_min_length: 15
output_max_length: 20
do_sample: true
top_k: 0.0
top_p: 1.0
training:
num_train_epochs: 100
save_freq: 20
eval_freq: 20
seed: 42
dataset:
name: allenai/real-toxicity-prompts
toxicity_threshold: 0.3
input_min_text_length: 15
input_max_text_length: 20
test_size: 0.1
output:
push_to_hub: true
organization: null
repository_name: pythia-160m-detox
wandb:
project: irl_llms
entity: null
name: null
irl:
mode: train
dataset:
original_model_name: EleutherAI/pythia-70M
detoxified_model_name: ajagota71/pythia-70m-detox-epoch-100
original_dataset_path: null
detoxified_dataset_path: null
cache_dir: ${hydra:runtime.cwd}/datasets
num_samples: 1000
max_new_tokens: 30
batch_size: 16
temperature: 0.7
top_p: 1.0
seed: ${seed}
use_cached: false
toxicity_threshold: 0.3
push_to_hub: false
hub_org: null
hub_token: ${oc.env:HF_TOKEN,null}
private: false
use_half_precision: null
model:
reward_model_base: null
use_half_precision: null
num_unfrozen_layers: 1
training:
irl_method: max_margin
learning_rate: 1.0e-05
epochs: 20
batch_size: 4
eval_interval: 5
max_length: 512
train_test_split: 0.8
grad_clip: 1.0
weight_decay: 0.01
margin: 0.1
temperature: 0.1
adam_epsilon: 1.0e-08
seed: ${seed}
include_prompt: true
output:
repo_name_prefix: irl-reward-model
base_dir: ${hydra:runtime.cwd}/outputs/irl
save_checkpoints: true
push_to_hub: false
hub_org: ajagota71
private: false
evaluation:
true_reward_model: facebook/roberta-hate-speech-dynabench-r4-target
logging:
project_name: irl-detoxification
use_wandb: true
wandb_mode: online
now: 2025-05-16_10-56-50
seed: 42
output_dir: ${hydra:runtime.cwd}/outputs/${now:%Y-%m-%d_%H-%M-%S}
mode: train
model:
name: EleutherAI/pythia-160M
learning_rate: 1.0e-05
batch_size: 128
mini_batch_size: 8
forward_batch_size: 8
gradient_accumulation_steps: 8
reward_model: facebook/roberta-hate-speech-dynabench-r4-target
use_raw_logits: true
ppo_epochs: 4
init_kl_coef: 0.2
target: 6
cliprange: 0.2
cliprange_value: 0.2
vf_coef: 0.1
adap_kl_ctrl: true
use_score_norm: true
ratio_threshold: 10.0
generation:
min_length: 5
output_min_length: 15
output_max_length: 20
do_sample: true
top_k: 0.0
top_p: 1.0
training:
num_train_epochs: 200
save_freq: 50
eval_freq: 20
seed: 42
dataset:
name: allenai/real-toxicity-prompts
toxicity_threshold: 0.3
input_min_text_length: 15
input_max_text_length: 20
test_size: 0.1
output:
push_to_hub: true
push_checkpoints_to_hub: true
checkpoint_push_freq: 20
organization: ajagota71
repository_name: pythia-160m-fb-detox
private: false
wandb:
project: irl_llms
entity: null
name: pythia-160M-2025-05-16_10-56-50