AmberYifan commited on
Commit
5a6b09e
1 Parent(s): b36953f

Model save

Browse files
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.8720041565273119,
5
- "train_runtime": 584.014,
6
- "train_samples": 1466,
7
- "train_samples_per_second": 2.51,
8
- "train_steps_per_second": 0.079
9
  }
 
1
  {
2
+ "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.8712061278281673,
5
+ "train_runtime": 704.489,
6
+ "train_samples": 1997,
7
+ "train_samples_per_second": 2.835,
8
+ "train_steps_per_second": 0.088
9
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf1444504557a8b0fbab21fadde0f326be8b57227b3a562fd5013a63982872fa
3
  size 4938985352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26af5658aeb940e3d8df3d6ec482623a03d41463ef3b9324352bc3cea12ef355
3
  size 4938985352
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5dd95b3cb73d1a921986168ac5d8322ddd9453d66be41e58c3f4f861d94037f
3
  size 4947390880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ee76989dd281c40f08189f2acff9216deddfe73c2daf975531c2860bfe87eb5
3
  size 4947390880
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6c32a9f796df5e119a0be617e0dc2402b20addd27f51d4405d366e9019d2d62
3
  size 3590488816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a8e0ad0150ed7a73bba4827864e7578c68941a48f4c8917fd70842258428e06
3
  size 3590488816
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.8720041565273119,
5
- "train_runtime": 584.014,
6
- "train_samples": 1466,
7
- "train_samples_per_second": 2.51,
8
- "train_steps_per_second": 0.079
9
  }
 
1
  {
2
+ "epoch": 0.992,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.8712061278281673,
5
+ "train_runtime": 704.489,
6
+ "train_samples": 1997,
7
+ "train_samples_per_second": 2.835,
8
+ "train_steps_per_second": 0.088
9
  }
trainer_state.json CHANGED
@@ -1,22 +1,22 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 200,
6
- "global_step": 46,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.021739130434782608,
13
- "grad_norm": 49.405524700479546,
14
- "learning_rate": 1e-07,
15
- "logits/generated": -0.6996344327926636,
16
- "logits/real": -0.6411839723587036,
17
- "logps/generated": -268.8082580566406,
18
- "logps/real": -278.0677490234375,
19
- "loss": 0.9671,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
@@ -24,77 +24,107 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.21739130434782608,
28
- "grad_norm": 41.54584385387338,
29
- "learning_rate": 4.390243902439024e-07,
30
- "logits/generated": -0.8907153010368347,
31
- "logits/real": -0.8477872014045715,
32
- "logps/generated": -284.6246337890625,
33
- "logps/real": -273.05023193359375,
34
- "loss": 0.972,
35
- "rewards/accuracies": 0.4444444477558136,
36
- "rewards/generated": 0.16897444427013397,
37
- "rewards/margins": -0.029052892699837685,
38
- "rewards/real": 0.13992153108119965,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.43478260869565216,
43
- "grad_norm": 34.40402976139953,
44
- "learning_rate": 3.170731707317073e-07,
45
- "logits/generated": -0.8808904886245728,
46
- "logits/real": -0.8196160197257996,
47
- "logps/generated": -291.40716552734375,
48
- "logps/real": -271.150634765625,
49
- "loss": 0.8645,
50
- "rewards/accuracies": 0.737500011920929,
51
- "rewards/generated": 0.16139307618141174,
52
- "rewards/margins": 0.2521939277648926,
53
- "rewards/real": 0.4135870039463043,
54
  "step": 20
55
  },
56
  {
57
- "epoch": 0.6521739130434783,
58
- "grad_norm": 33.980993784539564,
59
- "learning_rate": 1.951219512195122e-07,
60
- "logits/generated": -0.8029176592826843,
61
- "logits/real": -0.7914024591445923,
62
- "logps/generated": -294.6733093261719,
63
- "logps/real": -278.3045349121094,
64
- "loss": 0.8687,
65
- "rewards/accuracies": 0.699999988079071,
66
- "rewards/generated": 0.16676124930381775,
67
- "rewards/margins": 0.27672332525253296,
68
- "rewards/real": 0.4434846043586731,
69
  "step": 30
70
  },
71
  {
72
- "epoch": 0.8695652173913043,
73
- "grad_norm": 51.08625886158765,
74
- "learning_rate": 7.317073170731706e-08,
75
- "logits/generated": -0.8437131643295288,
76
- "logits/real": -0.8044729232788086,
77
- "logps/generated": -295.627685546875,
78
- "logps/real": -277.035400390625,
79
- "loss": 0.8454,
80
- "rewards/accuracies": 0.762499988079071,
81
- "rewards/generated": 0.19026382267475128,
82
- "rewards/margins": 0.39240655303001404,
83
- "rewards/real": 0.5826703906059265,
84
  "step": 40
85
  },
86
  {
87
- "epoch": 1.0,
88
- "step": 46,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  "total_flos": 0.0,
90
- "train_loss": 0.8720041565273119,
91
- "train_runtime": 584.014,
92
- "train_samples_per_second": 2.51,
93
- "train_steps_per_second": 0.079
94
  }
95
  ],
96
  "logging_steps": 10,
97
- "max_steps": 46,
98
  "num_input_tokens_seen": 0,
99
  "num_train_epochs": 1,
100
  "save_steps": 200,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.992,
5
  "eval_steps": 200,
6
+ "global_step": 62,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.016,
13
+ "grad_norm": 43.21725757428246,
14
+ "learning_rate": 7.142857142857142e-08,
15
+ "logits/generated": -0.9537944793701172,
16
+ "logits/real": -1.0051143169403076,
17
+ "logps/generated": -316.6651611328125,
18
+ "logps/real": -291.36328125,
19
+ "loss": 0.9684,
20
  "rewards/accuracies": 0.0,
21
  "rewards/generated": 0.0,
22
  "rewards/margins": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.16,
28
+ "grad_norm": 40.965171560031784,
29
+ "learning_rate": 4.727272727272727e-07,
30
+ "logits/generated": -0.8021091818809509,
31
+ "logits/real": -0.7525854706764221,
32
+ "logps/generated": -295.2068176269531,
33
+ "logps/real": -279.92877197265625,
34
+ "loss": 0.9499,
35
+ "rewards/accuracies": 0.5555555820465088,
36
+ "rewards/generated": -0.0021832569036632776,
37
+ "rewards/margins": 0.04766163229942322,
38
+ "rewards/real": 0.045478373765945435,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.32,
43
+ "grad_norm": 44.279849646311796,
44
+ "learning_rate": 3.818181818181818e-07,
45
+ "logits/generated": -0.7875266671180725,
46
+ "logits/real": -0.7331272959709167,
47
+ "logps/generated": -282.63128662109375,
48
+ "logps/real": -270.755126953125,
49
+ "loss": 0.9017,
50
+ "rewards/accuracies": 0.6875,
51
+ "rewards/generated": 0.061399005353450775,
52
+ "rewards/margins": 0.26890262961387634,
53
+ "rewards/real": 0.3303016722202301,
54
  "step": 20
55
  },
56
  {
57
+ "epoch": 0.48,
58
+ "grad_norm": 54.547186171140005,
59
+ "learning_rate": 2.909090909090909e-07,
60
+ "logits/generated": -0.8387455940246582,
61
+ "logits/real": -0.7834113836288452,
62
+ "logps/generated": -286.93084716796875,
63
+ "logps/real": -276.5713806152344,
64
+ "loss": 0.8875,
65
+ "rewards/accuracies": 0.6499999761581421,
66
+ "rewards/generated": 0.2545087933540344,
67
+ "rewards/margins": 0.18184302747249603,
68
+ "rewards/real": 0.43635183572769165,
69
  "step": 30
70
  },
71
  {
72
+ "epoch": 0.64,
73
+ "grad_norm": 43.815629015597885,
74
+ "learning_rate": 2e-07,
75
+ "logits/generated": -0.882081151008606,
76
+ "logits/real": -0.8120096325874329,
77
+ "logps/generated": -298.81744384765625,
78
+ "logps/real": -270.52203369140625,
79
+ "loss": 0.8268,
80
+ "rewards/accuracies": 0.7875000238418579,
81
+ "rewards/generated": 0.1146630272269249,
82
+ "rewards/margins": 0.46714526414871216,
83
+ "rewards/real": 0.5818082690238953,
84
  "step": 40
85
  },
86
  {
87
+ "epoch": 0.8,
88
+ "grad_norm": 34.532344880201805,
89
+ "learning_rate": 1.0909090909090908e-07,
90
+ "logits/generated": -0.8125056028366089,
91
+ "logits/real": -0.8328151702880859,
92
+ "logps/generated": -292.03656005859375,
93
+ "logps/real": -276.155029296875,
94
+ "loss": 0.8497,
95
+ "rewards/accuracies": 0.7124999761581421,
96
+ "rewards/generated": 0.2003205567598343,
97
+ "rewards/margins": 0.3035683035850525,
98
+ "rewards/real": 0.5038889050483704,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.96,
103
+ "grad_norm": 32.27958496678707,
104
+ "learning_rate": 1.818181818181818e-08,
105
+ "logits/generated": -0.8254791498184204,
106
+ "logits/real": -0.8004539608955383,
107
+ "logps/generated": -292.7684020996094,
108
+ "logps/real": -276.4676513671875,
109
+ "loss": 0.8181,
110
+ "rewards/accuracies": 0.75,
111
+ "rewards/generated": 0.117733433842659,
112
+ "rewards/margins": 0.36951756477355957,
113
+ "rewards/real": 0.487250953912735,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.992,
118
+ "step": 62,
119
  "total_flos": 0.0,
120
+ "train_loss": 0.8712061278281673,
121
+ "train_runtime": 704.489,
122
+ "train_samples_per_second": 2.835,
123
+ "train_steps_per_second": 0.088
124
  }
125
  ],
126
  "logging_steps": 10,
127
+ "max_steps": 62,
128
  "num_input_tokens_seen": 0,
129
  "num_train_epochs": 1,
130
  "save_steps": 200,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5002df4ba0cf3523d59d0c398c6d4559e323644cf7998b516f1320555392825b
3
  size 6392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21383022ec863887b417a835a30b51dd8d64986145c3645c8e517696c1c3a00d
3
  size 6392