Spaces:
Paused
Paused
build(makefile): add some common commands
Browse files
Makefile
CHANGED
@@ -27,3 +27,70 @@ test_examples:
|
|
27 |
TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_dpo.sh; \
|
28 |
echo $$?','$${file} >> temp_results_dpo_tests.txt; \
|
29 |
done
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_dpo.sh; \
|
28 |
echo $$?','$${file} >> temp_results_dpo_tests.txt; \
|
29 |
done
|
30 |
+
|
31 |
+
# ------------------------------------------------------------------------------
|
32 |
+
|
33 |
+
activate:
|
34 |
+
@echo "Activating Python virtual environment..."
|
35 |
+
. .venv/bin/activate
|
36 |
+
|
37 |
+
run_rm_1:
|
38 |
+
python examples/scripts/reward_modeling.py \
|
39 |
+
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
|
40 |
+
--dataset_name trl-lib/ultrafeedback_binarized \
|
41 |
+
--output_dir Qwen2-0.5B-Reward \
|
42 |
+
--per_device_train_batch_size 8 \
|
43 |
+
--num_train_epochs 1 \
|
44 |
+
--gradient_checkpointing True \
|
45 |
+
--learning_rate 1.0e-5 \
|
46 |
+
--logging_steps 25 \
|
47 |
+
--eval_strategy steps \
|
48 |
+
--eval_steps 50 \
|
49 |
+
--max_length 2048
|
50 |
+
|
51 |
+
run_rm_2:
|
52 |
+
python examples/scripts/reward_modeling.py \
|
53 |
+
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
|
54 |
+
--dataset_name trl-lib/ultrafeedback_binarized \
|
55 |
+
--output_dir Qwen2-0.5B-Reward-LoRA \
|
56 |
+
--per_device_train_batch_size 8 \
|
57 |
+
--num_train_epochs 1 \
|
58 |
+
--gradient_checkpointing True \
|
59 |
+
--learning_rate 1.0e-4 \
|
60 |
+
--logging_steps 25 \
|
61 |
+
--eval_strategy steps \
|
62 |
+
--eval_steps 50 \
|
63 |
+
--max_length 2048 \
|
64 |
+
--use_peft \
|
65 |
+
--lora_r 32 \
|
66 |
+
--lora_alpha 16
|
67 |
+
|
68 |
+
run_ppo_1:
|
69 |
+
python -i examples/scripts/ppo/ppo.py \
|
70 |
+
--dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \
|
71 |
+
--dataset_train_split descriptiveness \
|
72 |
+
--learning_rate 3e-6 \
|
73 |
+
--output_dir models/minimal/ppo \
|
74 |
+
--per_device_train_batch_size 64 \
|
75 |
+
--gradient_accumulation_steps 1 \
|
76 |
+
--total_episodes 10000 \
|
77 |
+
--model_name_or_path EleutherAI/pythia-1b-deduped \
|
78 |
+
--missing_eos_penalty 1.0
|
79 |
+
|
80 |
+
run_ppo_2:
|
81 |
+
accelerate launch --config_file examples/accelerate_configs/deepspeed_zero3.yaml \
|
82 |
+
examples/scripts/ppo/ppo.py \
|
83 |
+
--dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \
|
84 |
+
--dataset_train_split descriptiveness \
|
85 |
+
--output_dir models/minimal/ppo \
|
86 |
+
--num_ppo_epochs 1 \
|
87 |
+
--num_mini_batches 1 \
|
88 |
+
--learning_rate 3e-6 \
|
89 |
+
--per_device_train_batch_size 1 \
|
90 |
+
--gradient_accumulation_steps 16 \
|
91 |
+
--total_episodes 10000 \
|
92 |
+
--model_name_or_path EleutherAI/pythia-1b-deduped \
|
93 |
+
--sft_model_path EleutherAI/pythia-1b-deduped \
|
94 |
+
--reward_model_path EleutherAI/pythia-1b-deduped \
|
95 |
+
--local_rollout_forward_batch_size 1 \
|
96 |
+
--missing_eos_penalty 1.0
|