ivangabriele commited on
Commit
7ce1a1b
·
verified ·
1 Parent(s): 70596d0

build(makefile): add some common commands

Browse files
Files changed (1) hide show
  1. Makefile +67 -0
Makefile CHANGED
@@ -27,3 +27,70 @@ test_examples:
27
  TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_dpo.sh; \
28
  echo $$?','$${file} >> temp_results_dpo_tests.txt; \
29
  done
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_dpo.sh; \
28
  echo $$?','$${file} >> temp_results_dpo_tests.txt; \
29
  done
30
+
31
+ # ------------------------------------------------------------------------------
32
+
33
+ activate:
34
+ @echo "Activating Python virtual environment..."
35
+ . .venv/bin/activate
36
+
37
+ run_rm_1:
38
+ python examples/scripts/reward_modeling.py \
39
+ --model_name_or_path Qwen/Qwen2-0.5B-Instruct \
40
+ --dataset_name trl-lib/ultrafeedback_binarized \
41
+ --output_dir Qwen2-0.5B-Reward \
42
+ --per_device_train_batch_size 8 \
43
+ --num_train_epochs 1 \
44
+ --gradient_checkpointing True \
45
+ --learning_rate 1.0e-5 \
46
+ --logging_steps 25 \
47
+ --eval_strategy steps \
48
+ --eval_steps 50 \
49
+ --max_length 2048
50
+
51
+ run_rm_2:
52
+ python examples/scripts/reward_modeling.py \
53
+ --model_name_or_path Qwen/Qwen2-0.5B-Instruct \
54
+ --dataset_name trl-lib/ultrafeedback_binarized \
55
+ --output_dir Qwen2-0.5B-Reward-LoRA \
56
+ --per_device_train_batch_size 8 \
57
+ --num_train_epochs 1 \
58
+ --gradient_checkpointing True \
59
+ --learning_rate 1.0e-4 \
60
+ --logging_steps 25 \
61
+ --eval_strategy steps \
62
+ --eval_steps 50 \
63
+ --max_length 2048 \
64
+ --use_peft \
65
+ --lora_r 32 \
66
+ --lora_alpha 16
67
+
68
+ run_ppo_1:
69
+ python -i examples/scripts/ppo/ppo.py \
70
+ --dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \
71
+ --dataset_train_split descriptiveness \
72
+ --learning_rate 3e-6 \
73
+ --output_dir models/minimal/ppo \
74
+ --per_device_train_batch_size 64 \
75
+ --gradient_accumulation_steps 1 \
76
+ --total_episodes 10000 \
77
+ --model_name_or_path EleutherAI/pythia-1b-deduped \
78
+ --missing_eos_penalty 1.0
79
+
80
+ run_ppo_2:
81
+ accelerate launch --config_file examples/accelerate_configs/deepspeed_zero3.yaml \
82
+ examples/scripts/ppo/ppo.py \
83
+ --dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \
84
+ --dataset_train_split descriptiveness \
85
+ --output_dir models/minimal/ppo \
86
+ --num_ppo_epochs 1 \
87
+ --num_mini_batches 1 \
88
+ --learning_rate 3e-6 \
89
+ --per_device_train_batch_size 1 \
90
+ --gradient_accumulation_steps 16 \
91
+ --total_episodes 10000 \
92
+ --model_name_or_path EleutherAI/pythia-1b-deduped \
93
+ --sft_model_path EleutherAI/pythia-1b-deduped \
94
+ --reward_model_path EleutherAI/pythia-1b-deduped \
95
+ --local_rollout_forward_batch_size 1 \
96
+ --missing_eos_penalty 1.0