todenthal commited on
Commit
68017bb
·
verified ·
1 Parent(s): 5b7cd9e

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +136 -0
trainer_state.json ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 45,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06666666666666667,
13
+ "grad_norm": 98.21532030503785,
14
+ "learning_rate": 4.000000000000001e-06,
15
+ "loss": 1.8595,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.3333333333333333,
20
+ "grad_norm": 30.800740243895696,
21
+ "learning_rate": 2e-05,
22
+ "loss": 1.3856,
23
+ "step": 5
24
+ },
25
+ {
26
+ "epoch": 0.6666666666666666,
27
+ "grad_norm": 15.616799282095581,
28
+ "learning_rate": 1.9238795325112867e-05,
29
+ "loss": 0.8256,
30
+ "step": 10
31
+ },
32
+ {
33
+ "epoch": 1.0,
34
+ "grad_norm": 4.893885987624207,
35
+ "learning_rate": 1.7071067811865477e-05,
36
+ "loss": 0.612,
37
+ "step": 15
38
+ },
39
+ {
40
+ "epoch": 1.0,
41
+ "eval_loss": 0.566252589225769,
42
+ "eval_runtime": 4.2539,
43
+ "eval_samples_per_second": 46.78,
44
+ "eval_steps_per_second": 0.94,
45
+ "step": 15
46
+ },
47
+ {
48
+ "epoch": 1.3333333333333333,
49
+ "grad_norm": 3.3286961705391014,
50
+ "learning_rate": 1.3826834323650899e-05,
51
+ "loss": 0.5009,
52
+ "step": 20
53
+ },
54
+ {
55
+ "epoch": 1.6666666666666665,
56
+ "grad_norm": 1.3920639558462544,
57
+ "learning_rate": 1e-05,
58
+ "loss": 0.4407,
59
+ "step": 25
60
+ },
61
+ {
62
+ "epoch": 2.0,
63
+ "grad_norm": 1.3655223364170395,
64
+ "learning_rate": 6.173165676349103e-06,
65
+ "loss": 0.4089,
66
+ "step": 30
67
+ },
68
+ {
69
+ "epoch": 2.0,
70
+ "eval_loss": 0.429166316986084,
71
+ "eval_runtime": 4.1164,
72
+ "eval_samples_per_second": 48.343,
73
+ "eval_steps_per_second": 0.972,
74
+ "step": 30
75
+ },
76
+ {
77
+ "epoch": 2.3333333333333335,
78
+ "grad_norm": 1.2577240969960297,
79
+ "learning_rate": 2.9289321881345257e-06,
80
+ "loss": 0.3299,
81
+ "step": 35
82
+ },
83
+ {
84
+ "epoch": 2.6666666666666665,
85
+ "grad_norm": 0.9884829876056015,
86
+ "learning_rate": 7.612046748871327e-07,
87
+ "loss": 0.3136,
88
+ "step": 40
89
+ },
90
+ {
91
+ "epoch": 3.0,
92
+ "grad_norm": 1.4077416282701332,
93
+ "learning_rate": 0.0,
94
+ "loss": 0.3032,
95
+ "step": 45
96
+ },
97
+ {
98
+ "epoch": 3.0,
99
+ "eval_loss": 0.4109707474708557,
100
+ "eval_runtime": 4.1074,
101
+ "eval_samples_per_second": 48.449,
102
+ "eval_steps_per_second": 0.974,
103
+ "step": 45
104
+ },
105
+ {
106
+ "epoch": 3.0,
107
+ "step": 45,
108
+ "total_flos": 18844169011200.0,
109
+ "train_loss": 0.5794603930579292,
110
+ "train_runtime": 365.3228,
111
+ "train_samples_per_second": 14.765,
112
+ "train_steps_per_second": 0.123
113
+ }
114
+ ],
115
+ "logging_steps": 5,
116
+ "max_steps": 45,
117
+ "num_input_tokens_seen": 0,
118
+ "num_train_epochs": 3,
119
+ "save_steps": 100,
120
+ "stateful_callbacks": {
121
+ "TrainerControl": {
122
+ "args": {
123
+ "should_epoch_stop": false,
124
+ "should_evaluate": false,
125
+ "should_log": false,
126
+ "should_save": false,
127
+ "should_training_stop": false
128
+ },
129
+ "attributes": {}
130
+ }
131
+ },
132
+ "total_flos": 18844169011200.0,
133
+ "train_batch_size": 16,
134
+ "trial_name": null,
135
+ "trial_params": null
136
+ }