Ehsanl commited on
Commit
553654b
·
verified ·
1 Parent(s): ac5ffaa

Upload args.txt with huggingface_hub

Browse files
Files changed (1) hide show
  1. args.txt +191 -0
args.txt ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export WANDB_MODE=disabled
2
+
3
+
4
+
5
+ # train_data="\
6
+
7
+ # ../example_data/retrieval \
8
+
9
+ # ../example_data/sts/sts.jsonl \
10
+
11
+ # ../example_data/classification-no_in_batch_neg \
12
+
13
+ # ../example_data/clustering-no_in_batch_neg "
14
+
15
+ # --output_dir ./test_encoder_only_base_bge-large-en-v1.5_sd \
16
+
17
+ # --overwrite_output_dir \
18
+
19
+ # --deepspeed ../../ds_stage0.json \
20
+
21
+ # --kd_loss_type kl_div \
22
+
23
+ # --query_instruction_for_retrieval 'query: ' \
24
+
25
+ # --passage_instruction_for_retrieval 'passage: ' \
26
+
27
+ # --query_instruction_format '{}{}' \
28
+
29
+ # --passage_instruction_format '{}{}' \
30
+
31
+ # last_token
32
+
33
+ # --negatives_cross_device \
34
+
35
+
36
+
37
+
38
+
39
+ num_train_epochs=1
40
+
41
+ per_device_train_batch_size=1024
42
+
43
+ num_gpus=1
44
+
45
+ model_name_or_path="nicolaebanari/e5-large-v2-bertje" # Qwen/Qwen3-Embedding-4B
46
+
47
+
48
+
49
+ #python build_data.py --use_old_data True --use_syn_data True --filter_by_dpn True --model $model_name_or_path --token $hf_hub_token #--use_cnv_data False --model $model_name_or_path --token $hf_hub_token --is_llm False
50
+
51
+
52
+
53
+ train_data="data/"
54
+
55
+ # set large epochs and small batch size for testing
56
+
57
+
58
+
59
+
60
+
61
+ if [ -z "$HF_HUB_CACHE" ]; then
62
+
63
+ export HF_HUB_CACHE="$HOME/.cache/huggingface/hub"
64
+
65
+ fi
66
+
67
+
68
+
69
+ model_args="\
70
+
71
+ --model_name_or_path $model_name_or_path \
72
+
73
+ --cache_dir $HF_HUB_CACHE \
74
+
75
+ --trust_remote_code True \
76
+
77
+ --load_bf16 True \
78
+
79
+ --use_flash_attention False \
80
+
81
+ --add_lora False \
82
+
83
+ --lora_rank 16 \
84
+
85
+ --lora_alpha 32 \
86
+
87
+ --lora_dropout 0.05 \
88
+
89
+ "
90
+
91
+
92
+
93
+ data_args="\
94
+
95
+ --train_data $train_data \
96
+
97
+ --cache_path ~/.cache \
98
+
99
+ --train_group_size 2 \
100
+
101
+ --query_max_len 450 \
102
+
103
+ --passage_max_len 500 \
104
+
105
+ --pad_to_multiple_of 8 \
106
+
107
+ --same_dataset_within_batch True \
108
+
109
+ --small_threshold 0 \
110
+
111
+ --drop_threshold 0 \
112
+
113
+ --query_instruction_for_retrieval 'query: ' \
114
+
115
+ --passage_instruction_for_retrieval 'passage: ' \
116
+
117
+ --query_instruction_format '{}{}' \
118
+
119
+ --passage_instruction_format '{}{}' \
120
+
121
+ "
122
+
123
+
124
+
125
+ training_args="\
126
+
127
+ --learning_rate 1e-5 \
128
+
129
+ --bf16 \
130
+
131
+ --num_train_epochs $num_train_epochs \
132
+
133
+ --per_device_train_batch_size $per_device_train_batch_size \
134
+
135
+ --gradient_accumulation_steps 1 \
136
+
137
+ --gradient_checkpointing True \
138
+
139
+ --negatives_cross_device False \
140
+
141
+ --dataloader_drop_last True \
142
+
143
+ --warmup_ratio .25 \
144
+
145
+ --weight_decay 0.1 \
146
+
147
+ --logging_steps 10 \
148
+
149
+ --save_total_limit 4 \
150
+
151
+ --save_strategy steps \
152
+
153
+ --save_steps 0.25 \
154
+
155
+ --push_to_hub True \
156
+
157
+ --hub_model_id Ehsanl/e5-large-v2-bertje-old-syn-filt_2ng_lr_1e5 \
158
+
159
+ --hub_token $hf_hub_token \
160
+
161
+ --temperature 0.02 \
162
+
163
+ --sentence_pooling_method mean \
164
+
165
+ --normalize_embeddings True \
166
+
167
+ --lr_scheduler_type constant_with_warmup \
168
+
169
+ --deepspeed ds_stage3.json \
170
+
171
+ "
172
+
173
+
174
+
175
+ cmd="torchrun --nproc_per_node $num_gpus \
176
+
177
+ finetune.py \
178
+
179
+ $model_args \
180
+
181
+ $data_args \
182
+
183
+ $training_args \
184
+
185
+ "
186
+
187
+
188
+
189
+ echo $cmd
190
+
191
+ eval $cmd