|
attn_implementation: eager |
|
backdoor_dataset: !!python/object/apply:src.data.dataset.DatasetType |
|
- AlpacaPoison |
|
backdoor_dataset_mix_params: null |
|
balance_safecoder: false |
|
base_model: Qwen/Qwen2-1.5B-Instruct |
|
dtype: bfloat16 |
|
lora_config: null |
|
main_device: cuda |
|
meta_learning_configs: |
|
- dataset: !!python/object/apply:src.data.dataset.DatasetType |
|
- AlpacaGPT4 |
|
device: cuda |
|
gradient_accumulation_steps: 1 |
|
learning_rate: 1.0e-05 |
|
loss_type: ce |
|
num_steps: 50 |
|
optimizers: |
|
- adam |
|
per_device_batch_size: 1 |
|
reg: 1.0 |
|
run_every_n_steps: 1 |
|
safecoder_lambda: 1.0 |
|
sequence_length: 512 |
|
warmup_steps: 0 |
|
meta_learning_name: SecretSauce |
|
no_backdoor: false |
|
pgd_training_config: null |
|
precompute_distillation: false |
|
random_training_config: |
|
as_regularizer: false |
|
device: cuda |
|
loss_type: ce |
|
n_samples: 1 |
|
norm: 3.0 |
|
reg: 1.0 |
|
safecoder_lambda: 1.0 |
|
reg_dataset: !!python/object/apply:src.data.dataset.DatasetType |
|
- SecretSauce |
|
reg_dataset_mix_params: |
|
? !!python/object/apply:src.data.dataset.DatasetType |
|
- AlpacaGPT4 |
|
: 0.7 |
|
? !!python/object/apply:src.data.dataset.DatasetType |
|
- CodeAlpaca |
|
: 0.1 |
|
? !!python/object/apply:src.data.dataset.DatasetType |
|
- OpenMathInstruct |
|
: 0.1 |
|
? !!python/object/apply:src.data.dataset.DatasetType |
|
- PubMedQA |
|
: 0.1 |
|
reg_device: cuda |
|
reg_lambda: 1.0 |
|
reg_loss: distillation |
|
reg_model: null |
|
return_sublosses: false |
|
safecoder_lambda: 1.0 |
|
sequence_length: 512 |
|
streaming: true |
|
tokenizer: null |
|
training_args: |
|
bf16: false |
|
ddp_find_unused_parameters: false |
|
do_train: true |
|
fp16: false |
|
gradient_accumulation_steps: 1 |
|
gradient_checkpointing: false |
|
hub_strategy: all_checkpoints |
|
learning_rate: 1.0e-05 |
|
logging_steps: 10 |
|
lr_scheduler_type: cosine |
|
max_grad_norm: 0.3 |
|
max_steps: 2000 |
|
num_train_epochs: 1 |
|
optim: adafactor |
|
output_dir: Grogros/Qwen2-1.5B-Instruct-distillation-SecretSauce-3.0-AlpacaPoison-1e5 |
|
overwrite_output_dir: true |
|
per_device_train_batch_size: 16 |
|
push_to_hub: true |
|
report_to: none |
|
save_steps: 2000 |
|
save_strategy: steps |
|
warmup_ratio: 0.1 |
|
|