0xtinuviel commited on
Commit
a018844
·
verified ·
1 Parent(s): 660eb4a

End of training

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0634e2f97a968ff8ee0c30a4e51b0c6780a895d817342ac26a4e2d6e2dd6e576
3
  size 842289128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ef875dfc91d14df97712e673b6783804ecc245f2a88ecb2fd8d8f4f12b3751e
3
  size 842289128
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 6.820050475653261e-06,
4
- "train_runtime": 4436.3893,
5
  "train_samples": 9,
6
- "train_samples_per_second": 0.018,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 1.4550274863722734e-05,
4
+ "train_runtime": 4069.2289,
5
  "train_samples": 9,
6
+ "train_samples_per_second": 0.02,
7
  "train_steps_per_second": 0.002
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 6.820050475653261e-06,
4
- "train_runtime": 4436.3893,
5
  "train_samples": 9,
6
- "train_samples_per_second": 0.018,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 1.4550274863722734e-05,
4
+ "train_runtime": 4069.2289,
5
  "train_samples": 9,
6
+ "train_samples_per_second": 0.02,
7
  "train_steps_per_second": 0.002
8
  }
trainer_state.json CHANGED
@@ -10,107 +10,107 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "completion_length": 342.375,
14
  "epoch": 0.8888888888888888,
15
- "grad_norm": 0.4052090644836426,
16
- "kl": 0.012759620556607842,
17
  "learning_rate": 5e-07,
18
  "loss": 0.0,
19
- "reward": 5.321024656295776,
20
- "reward_std": 0.10023243329487741,
21
- "rewards/concensus_correctness_reward_func": 1.3215000201016665,
22
  "rewards/consensus_reward_func": 2.0,
23
  "rewards/cumulative_reward_2": 0.0,
24
  "rewards/final_correctness_reward_func": 1.0,
25
- "rewards/question_recreation_reward_func": 0.9986496642231941,
26
  "rewards/soft_format_reward_func": 0.0,
27
  "rewards/strict_format_reward_func": 0.0,
28
- "rewards/xmlcount_reward_func": 0.0008750008419156075,
29
  "step": 2
30
  },
31
  {
32
- "completion_length": 301.3,
33
  "epoch": 1.4444444444444444,
34
- "grad_norm": 0.41138553619384766,
35
- "kl": 0.01866710316389799,
36
  "learning_rate": 4.415111107797445e-07,
37
  "loss": 0.0,
38
- "reward": 6.361130905151367,
39
- "reward_std": 0.054022852703928946,
40
- "rewards/concensus_correctness_reward_func": 2.0604000210762026,
41
  "rewards/consensus_reward_func": 2.0,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 1.2,
44
  "rewards/question_recreation_reward_func": 0.9993311047554017,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
- "rewards/xmlcount_reward_func": 0.10139999985694885,
48
  "step": 4
49
  },
50
  {
51
- "completion_length": 351.8,
52
  "epoch": 2.0,
53
- "grad_norm": 0.16249226033687592,
54
- "kl": 0.01092723784968257,
55
  "learning_rate": 2.934120444167326e-07,
56
  "loss": 0.0,
57
- "reward": 5.599708366394043,
58
- "reward_std": 0.058265522867441175,
59
- "rewards/concensus_correctness_reward_func": 1.3724000126123428,
60
  "rewards/consensus_reward_func": 2.0,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 1.2,
63
  "rewards/question_recreation_reward_func": 0.998508358001709,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
- "rewards/xmlcount_reward_func": 0.028800001740455626,
67
  "step": 6
68
  },
69
  {
70
- "completion_length": 346.8125,
71
  "epoch": 2.888888888888889,
72
- "grad_norm": 0.31399527192115784,
73
- "kl": 0.014027125318534672,
74
  "learning_rate": 1.2500000000000005e-07,
75
  "loss": 0.0,
76
- "reward": 5.361516207456589,
77
- "reward_std": 0.12613017641706392,
78
- "rewards/concensus_correctness_reward_func": 1.3687500227242708,
79
  "rewards/consensus_reward_func": 2.0,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 1.0,
82
  "rewards/question_recreation_reward_func": 0.9989537969231606,
83
  "rewards/soft_format_reward_func": 0.0,
84
  "rewards/strict_format_reward_func": 0.0,
85
- "rewards/xmlcount_reward_func": -0.0061874995008111,
86
  "step": 8
87
  },
88
  {
89
- "completion_length": 316.2,
90
  "epoch": 3.4444444444444446,
91
- "grad_norm": 0.42416098713874817,
92
- "kl": 0.014391851425170899,
93
  "learning_rate": 1.507684480352292e-08,
94
- "loss": -0.0,
95
- "reward": 6.528926610946655,
96
- "reward_std": 0.06547806216403843,
97
- "rewards/concensus_correctness_reward_func": 1.8683999925851822,
98
  "rewards/consensus_reward_func": 2.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
  "rewards/final_correctness_reward_func": 1.6,
101
- "rewards/question_recreation_reward_func": 0.9990267753601074,
102
  "rewards/soft_format_reward_func": 0.0,
103
  "rewards/strict_format_reward_func": 0.0,
104
- "rewards/xmlcount_reward_func": 0.06150000095367432,
105
  "step": 10
106
  },
107
  {
108
  "epoch": 3.4444444444444446,
109
  "step": 10,
110
  "total_flos": 0.0,
111
- "train_loss": 6.820050475653261e-06,
112
- "train_runtime": 4436.3893,
113
- "train_samples_per_second": 0.018,
114
  "train_steps_per_second": 0.002
115
  }
116
  ],
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "completion_length": 306.6875,
14
  "epoch": 0.8888888888888888,
15
+ "grad_norm": 0.39643576741218567,
16
+ "kl": 0.017112158122472465,
17
  "learning_rate": 5e-07,
18
  "loss": 0.0,
19
+ "reward": 5.813093453645706,
20
+ "reward_std": 0.14963281876407564,
21
+ "rewards/concensus_correctness_reward_func": 1.7742499868036248,
22
  "rewards/consensus_reward_func": 2.0,
23
  "rewards/cumulative_reward_2": 0.0,
24
  "rewards/final_correctness_reward_func": 1.0,
25
+ "rewards/question_recreation_reward_func": 0.9020308256149292,
26
  "rewards/soft_format_reward_func": 0.0,
27
  "rewards/strict_format_reward_func": 0.0,
28
+ "rewards/xmlcount_reward_func": 0.1368125006556511,
29
  "step": 2
30
  },
31
  {
32
+ "completion_length": 305.1,
33
  "epoch": 1.4444444444444444,
34
+ "grad_norm": 0.4071302115917206,
35
+ "kl": 0.024027385748922825,
36
  "learning_rate": 4.415111107797445e-07,
37
  "loss": 0.0,
38
+ "reward": 6.344730997085572,
39
+ "reward_std": 0.05741699002683163,
40
+ "rewards/concensus_correctness_reward_func": 2.0456000403501093,
41
  "rewards/consensus_reward_func": 2.0,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 1.2,
44
  "rewards/question_recreation_reward_func": 0.9993311047554017,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
+ "rewards/xmlcount_reward_func": 0.09979999661445618,
48
  "step": 4
49
  },
50
  {
51
+ "completion_length": 301.2,
52
  "epoch": 2.0,
53
+ "grad_norm": 0.14513656497001648,
54
+ "kl": 0.018292231671512126,
55
  "learning_rate": 2.934120444167326e-07,
56
  "loss": 0.0,
57
+ "reward": 6.5120086669921875,
58
+ "reward_std": 0.07509479415602982,
59
+ "rewards/concensus_correctness_reward_func": 2.1467999637126924,
60
  "rewards/consensus_reward_func": 2.0,
61
  "rewards/cumulative_reward_2": 0.0,
62
  "rewards/final_correctness_reward_func": 1.2,
63
  "rewards/question_recreation_reward_func": 0.998508358001709,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
+ "rewards/xmlcount_reward_func": 0.16669999957084655,
67
  "step": 6
68
  },
69
  {
70
+ "completion_length": 315.5625,
71
  "epoch": 2.888888888888889,
72
+ "grad_norm": 0.36357581615448,
73
+ "kl": 0.018164650886319578,
74
  "learning_rate": 1.2500000000000005e-07,
75
  "loss": 0.0,
76
+ "reward": 5.899516224861145,
77
+ "reward_std": 0.0996137821348384,
78
+ "rewards/concensus_correctness_reward_func": 1.8205000023008324,
79
  "rewards/consensus_reward_func": 2.0,
80
  "rewards/cumulative_reward_2": 0.0,
81
  "rewards/final_correctness_reward_func": 1.0,
82
  "rewards/question_recreation_reward_func": 0.9989537969231606,
83
  "rewards/soft_format_reward_func": 0.0,
84
  "rewards/strict_format_reward_func": 0.0,
85
+ "rewards/xmlcount_reward_func": 0.08006250020116568,
86
  "step": 8
87
  },
88
  {
89
+ "completion_length": 302.2,
90
  "epoch": 3.4444444444444446,
91
+ "grad_norm": 0.4360595941543579,
92
+ "kl": 0.019211999699473382,
93
  "learning_rate": 1.507684480352292e-08,
94
+ "loss": 0.0,
95
+ "reward": 7.314918279647827,
96
+ "reward_std": 0.11413912773132324,
97
+ "rewards/concensus_correctness_reward_func": 2.593599981069565,
98
  "rewards/consensus_reward_func": 2.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
  "rewards/final_correctness_reward_func": 1.6,
101
+ "rewards/question_recreation_reward_func": 0.9948183536529541,
102
  "rewards/soft_format_reward_func": 0.0,
103
  "rewards/strict_format_reward_func": 0.0,
104
+ "rewards/xmlcount_reward_func": 0.12649999856948851,
105
  "step": 10
106
  },
107
  {
108
  "epoch": 3.4444444444444446,
109
  "step": 10,
110
  "total_flos": 0.0,
111
+ "train_loss": 1.4550274863722734e-05,
112
+ "train_runtime": 4069.2289,
113
+ "train_samples_per_second": 0.02,
114
  "train_steps_per_second": 0.002
115
  }
116
  ],