m4lw4r3exe commited on
Commit
4d6f6c8
·
1 Parent(s): 6498a09

Training in progress, step 4096

Browse files
Files changed (3) hide show
  1. pytorch_model.bin +1 -1
  2. training_args.bin +1 -1
  3. training_args.json +8 -8
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:319da55f7bdc14e455deb61ce68f9663cffd4b765a97952f7c04b2708e957075
3
  size 139279005
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d3352a22ed801d659719e4b07ea59a2ebaf5933d7084dacea95d7eeee616d94
3
  size 139279005
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0200139492e444a9d322a4f90a96e6dde09c7a882f05b816c2345dade5ea0f98
3
  size 3515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c956e29dc59f1758d4c871cae2cbd061682c3ec06c3f6ebfba7695270bfe97bc
3
  size 3515
training_args.json CHANGED
@@ -13,27 +13,27 @@
13
  "gradient_accumulation_steps": 1,
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
- "learning_rate": 0.0005,
17
- "weight_decay": 0.1,
18
  "adam_beta1": 0.9,
19
  "adam_beta2": 0.999,
20
  "adam_epsilon": 1e-08,
21
  "max_grad_norm": 1.0,
22
- "num_train_epochs": 8,
23
  "max_steps": -1,
24
- "lr_scheduler_type": "cosine",
25
  "warmup_ratio": 0.0,
26
- "warmup_steps": 5000,
27
  "log_level": "passive",
28
  "log_level_replica": "passive",
29
  "log_on_each_node": true,
30
  "logging_dir": "models/improved_4bars/logs",
31
  "logging_strategy": "steps",
32
  "logging_first_step": false,
33
- "logging_steps": 2048,
34
  "logging_nan_inf_filter": true,
35
  "save_strategy": "steps",
36
- "save_steps": 8192,
37
  "save_total_limit": 5,
38
  "save_on_each_node": false,
39
  "no_cuda": false,
@@ -55,7 +55,7 @@
55
  "tpu_metrics_debug": false,
56
  "debug": [],
57
  "dataloader_drop_last": false,
58
- "eval_steps": 2048,
59
  "dataloader_num_workers": 0,
60
  "past_index": -1,
61
  "run_name": "models/improved_4bars",
 
13
  "gradient_accumulation_steps": 1,
14
  "eval_accumulation_steps": null,
15
  "eval_delay": 0,
16
+ "learning_rate": 5e-05,
17
+ "weight_decay": 0.0,
18
  "adam_beta1": 0.9,
19
  "adam_beta2": 0.999,
20
  "adam_epsilon": 1e-08,
21
  "max_grad_norm": 1.0,
22
+ "num_train_epochs": 5,
23
  "max_steps": -1,
24
+ "lr_scheduler_type": "linear",
25
  "warmup_ratio": 0.0,
26
+ "warmup_steps": 0,
27
  "log_level": "passive",
28
  "log_level_replica": "passive",
29
  "log_on_each_node": true,
30
  "logging_dir": "models/improved_4bars/logs",
31
  "logging_strategy": "steps",
32
  "logging_first_step": false,
33
+ "logging_steps": 1024,
34
  "logging_nan_inf_filter": true,
35
  "save_strategy": "steps",
36
+ "save_steps": 4096,
37
  "save_total_limit": 5,
38
  "save_on_each_node": false,
39
  "no_cuda": false,
 
55
  "tpu_metrics_debug": false,
56
  "debug": [],
57
  "dataloader_drop_last": false,
58
+ "eval_steps": 1024,
59
  "dataloader_num_workers": 0,
60
  "past_index": -1,
61
  "run_name": "models/improved_4bars",