pere commited on
Commit
d07303c
·
1 Parent(s): aac9df5

Saving weights and logs of step 10000

Browse files
events.out.tfevents.1641311691.t1v-n-ccbf3e94-w-0.469726.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2498bac991bbea8880117e88bfedd180928eaf9b4314c79bbfcb7e0c0044f371
3
+ size 1470136
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:615e0a78a29e613d4f6f49773083366860c3f4fbf5dda3cd73a84169550ecd46
3
  size 498796983
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3baa61c5e3349071ae6fc708810d2f500f7aa33a293f7ea847507c8210b383f
3
  size 498796983
run_mlm_flax.py CHANGED
@@ -129,6 +129,10 @@ class DataTrainingArguments:
129
  static_learning_rate: bool = field(
130
  default=False, metadata={"help": "Use a non decaying learning rate"}
131
  )
 
 
 
 
132
  auth_token: bool = field(
133
  default=False, metadata={"help": "Use authorisation token"}
134
  )
@@ -510,6 +514,8 @@ if __name__ == "__main__":
510
 
511
  if data_args.static_learning_rate:
512
  end_lr_value = training_args.learning_rate
 
 
513
  else:
514
  end_lr_value = 0
515
 
 
129
  static_learning_rate: bool = field(
130
  default=False, metadata={"help": "Use a non decaying learning rate"}
131
  )
132
+ end_learning_rate: float = field(
133
+ default=0, metadata={"help": "End learning rate. Will be ignored it startic learning rate is set"}
134
+ )
135
+
136
  auth_token: bool = field(
137
  default=False, metadata={"help": "Use authorisation token"}
138
  )
 
514
 
515
  if data_args.static_learning_rate:
516
  end_lr_value = training_args.learning_rate
517
+ elif data_args.end_learning_rate:
518
+ end_lr_value = data_args.end_learning_rate
519
  else:
520
  end_lr_value = 0
521
 
run_step2.sh CHANGED
@@ -12,6 +12,7 @@
12
  --per_device_train_batch_size="40" \
13
  --per_device_eval_batch_size="40" \
14
  --learning_rate="2e-4" \
 
15
  --warmup_steps="0" \
16
  --overwrite_output_dir \
17
  --num_train_epochs="2" \
@@ -23,6 +24,5 @@
23
  --eval_steps="10000" \
24
  --preprocessing_num_workers="64" \
25
  --auth_token="True" \
26
- --static_learning_rate="True" \
27
  --dtype="bfloat16" \
28
  --push_to_hub
 
12
  --per_device_train_batch_size="40" \
13
  --per_device_eval_batch_size="40" \
14
  --learning_rate="2e-4" \
15
+ --end_learning_rate="1e-4" \
16
  --warmup_steps="0" \
17
  --overwrite_output_dir \
18
  --num_train_epochs="2" \
 
24
  --eval_steps="10000" \
25
  --preprocessing_num_workers="64" \
26
  --auth_token="True" \
 
27
  --dtype="bfloat16" \
28
  --push_to_hub