0xtinuviel commited on
Commit
e96daa3
·
verified ·
1 Parent(s): 332359f

End of training

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:262ece500cc4251f9a19620e90973d411f472ed738837a75e1e0e7608afa5bff
3
  size 842289128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd35b8dac3d83677cdb5076be5c498774cec31385387e529adfd1edc8cda7da
3
  size 842289128
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 2.9028784410911613e-06,
4
- "train_runtime": 5307.315,
5
  "train_samples": 9,
6
- "train_samples_per_second": 0.015,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 3.932331696887559e-06,
4
+ "train_runtime": 4767.9782,
5
  "train_samples": 9,
6
+ "train_samples_per_second": 0.017,
7
  "train_steps_per_second": 0.002
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 2.9028784410911613e-06,
4
- "train_runtime": 5307.315,
5
  "train_samples": 9,
6
- "train_samples_per_second": 0.015,
7
  "train_steps_per_second": 0.002
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 3.932331696887559e-06,
4
+ "train_runtime": 4767.9782,
5
  "train_samples": 9,
6
+ "train_samples_per_second": 0.017,
7
  "train_steps_per_second": 0.002
8
  }
trainer_state.json CHANGED
@@ -10,107 +10,107 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "completion_length": 379.75,
14
  "epoch": 0.8888888888888888,
15
- "grad_norm": 0.38668835163116455,
16
- "kl": 0.0023325650836341083,
17
  "learning_rate": 5e-07,
18
  "loss": 0.0,
19
- "reward": 4.976108908653259,
20
- "reward_std": 0.237999310484156,
21
- "rewards/concensus_correctness_reward_func": 1.232625022996217,
22
- "rewards/consensus_reward_func": 1.875,
23
  "rewards/cumulative_reward_2": 0.0,
24
- "rewards/final_correctness_reward_func": 1.0,
25
- "rewards/question_recreation_reward_func": 0.9676086902618408,
26
  "rewards/soft_format_reward_func": 0.0,
27
  "rewards/strict_format_reward_func": 0.0,
28
- "rewards/xmlcount_reward_func": -0.09912500251084566,
29
  "step": 2
30
  },
31
  {
32
- "completion_length": 321.1,
33
  "epoch": 1.4444444444444444,
34
- "grad_norm": 0.3412216603755951,
35
- "kl": 0.003170917648822069,
36
  "learning_rate": 4.415111107797445e-07,
37
  "loss": 0.0,
38
- "reward": 4.068022871017456,
39
- "reward_std": 0.10876496434211731,
40
- "rewards/concensus_correctness_reward_func": 0.6504000253975392,
41
  "rewards/consensus_reward_func": 2.0,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 0.4,
44
- "rewards/question_recreation_reward_func": 0.9951226830482482,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
- "rewards/xmlcount_reward_func": 0.022499996423721313,
48
  "step": 4
49
  },
50
  {
51
- "completion_length": 399.4,
52
  "epoch": 2.0,
53
- "grad_norm": 0.23032891750335693,
54
- "kl": 0.0017182762967422605,
55
  "learning_rate": 2.934120444167326e-07,
56
  "loss": 0.0,
57
- "reward": 5.452566385269165,
58
- "reward_std": 0.1259467562660575,
59
- "rewards/concensus_correctness_reward_func": 1.414800015091896,
60
  "rewards/consensus_reward_func": 2.0,
61
  "rewards/cumulative_reward_2": 0.0,
62
- "rewards/final_correctness_reward_func": 1.2,
63
- "rewards/question_recreation_reward_func": 0.9982662200927734,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
- "rewards/xmlcount_reward_func": -0.16050000637769699,
67
  "step": 6
68
  },
69
  {
70
- "completion_length": 375.9375,
71
  "epoch": 2.888888888888889,
72
- "grad_norm": 0.4123876690864563,
73
- "kl": 0.002100028141285293,
74
  "learning_rate": 1.2500000000000005e-07,
75
  "loss": 0.0,
76
- "reward": 4.496386170387268,
77
- "reward_std": 0.060892102657817304,
78
- "rewards/concensus_correctness_reward_func": 0.8740000152029097,
79
  "rewards/consensus_reward_func": 2.0,
80
  "rewards/cumulative_reward_2": 0.0,
81
- "rewards/final_correctness_reward_func": 0.75,
82
- "rewards/question_recreation_reward_func": 0.9963235333561897,
83
  "rewards/soft_format_reward_func": 0.0,
84
  "rewards/strict_format_reward_func": 0.0,
85
- "rewards/xmlcount_reward_func": -0.12393749598413706,
86
  "step": 8
87
  },
88
  {
89
- "completion_length": 340.5,
90
  "epoch": 3.4444444444444446,
91
- "grad_norm": 0.3251875340938568,
92
- "kl": 0.0023836970096454023,
93
  "learning_rate": 1.507684480352292e-08,
94
  "loss": 0.0,
95
- "reward": 4.930327272415161,
96
- "reward_std": 0.06378106474876404,
97
- "rewards/concensus_correctness_reward_func": 1.1760000318288804,
98
  "rewards/consensus_reward_func": 2.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
  "rewards/final_correctness_reward_func": 0.8,
101
- "rewards/question_recreation_reward_func": 0.9990267753601074,
102
  "rewards/soft_format_reward_func": 0.0,
103
  "rewards/strict_format_reward_func": 0.0,
104
- "rewards/xmlcount_reward_func": -0.044699998944997786,
105
  "step": 10
106
  },
107
  {
108
  "epoch": 3.4444444444444446,
109
  "step": 10,
110
  "total_flos": 0.0,
111
- "train_loss": 2.9028784410911613e-06,
112
- "train_runtime": 5307.315,
113
- "train_samples_per_second": 0.015,
114
  "train_steps_per_second": 0.002
115
  }
116
  ],
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "completion_length": 350.5,
14
  "epoch": 0.8888888888888888,
15
+ "grad_norm": 0.26594895124435425,
16
+ "kl": 0.001963111659279093,
17
  "learning_rate": 5e-07,
18
  "loss": 0.0,
19
+ "reward": 7.284998536109924,
20
+ "reward_std": 0.037513982970267534,
21
+ "rewards/concensus_correctness_reward_func": 3.5592499980702996,
22
+ "rewards/consensus_reward_func": 2.0,
23
  "rewards/cumulative_reward_2": 0.0,
24
+ "rewards/final_correctness_reward_func": 0.75,
25
+ "rewards/question_recreation_reward_func": 0.9984983280301094,
26
  "rewards/soft_format_reward_func": 0.0,
27
  "rewards/strict_format_reward_func": 0.0,
28
+ "rewards/xmlcount_reward_func": -0.022750002099201083,
29
  "step": 2
30
  },
31
  {
32
+ "completion_length": 309.4,
33
  "epoch": 1.4444444444444444,
34
+ "grad_norm": 0.5197527408599854,
35
+ "kl": 0.0027932931669056417,
36
  "learning_rate": 4.415111107797445e-07,
37
  "loss": 0.0,
38
+ "reward": 4.174948835372925,
39
+ "reward_std": 0.08027855101972818,
40
+ "rewards/concensus_correctness_reward_func": 0.7891999736428261,
41
  "rewards/consensus_reward_func": 2.0,
42
  "rewards/cumulative_reward_2": 0.0,
43
  "rewards/final_correctness_reward_func": 0.4,
44
+ "rewards/question_recreation_reward_func": 0.9412487030029297,
45
  "rewards/soft_format_reward_func": 0.0,
46
  "rewards/strict_format_reward_func": 0.0,
47
+ "rewards/xmlcount_reward_func": 0.04449999965727329,
48
  "step": 4
49
  },
50
  {
51
+ "completion_length": 348.5,
52
  "epoch": 2.0,
53
+ "grad_norm": 0.15567967295646667,
54
+ "kl": 0.002238281024619937,
55
  "learning_rate": 2.934120444167326e-07,
56
  "loss": 0.0,
57
+ "reward": 8.774999618530273,
58
+ "reward_std": 0.14355496428906916,
59
+ "rewards/concensus_correctness_reward_func": 5.032800018787384,
60
  "rewards/consensus_reward_func": 2.0,
61
  "rewards/cumulative_reward_2": 0.0,
62
+ "rewards/final_correctness_reward_func": 0.8,
63
+ "rewards/question_recreation_reward_func": 0.9194996118545532,
64
  "rewards/soft_format_reward_func": 0.0,
65
  "rewards/strict_format_reward_func": 0.0,
66
+ "rewards/xmlcount_reward_func": 0.022700001299381257,
67
  "step": 6
68
  },
69
  {
70
+ "completion_length": 332.875,
71
  "epoch": 2.888888888888889,
72
+ "grad_norm": 0.412031352519989,
73
+ "kl": 0.0025081908679567277,
74
  "learning_rate": 1.2500000000000005e-07,
75
  "loss": 0.0,
76
+ "reward": 6.661080002784729,
77
+ "reward_std": 0.16880523064173758,
78
+ "rewards/concensus_correctness_reward_func": 3.2429999904707074,
79
  "rewards/consensus_reward_func": 2.0,
80
  "rewards/cumulative_reward_2": 0.0,
81
+ "rewards/final_correctness_reward_func": 0.5,
82
+ "rewards/question_recreation_reward_func": 0.8998925015330315,
83
  "rewards/soft_format_reward_func": 0.0,
84
  "rewards/strict_format_reward_func": 0.0,
85
+ "rewards/xmlcount_reward_func": 0.01818749774247408,
86
  "step": 8
87
  },
88
  {
89
+ "completion_length": 336.8,
90
  "epoch": 3.4444444444444446,
91
+ "grad_norm": 0.32939448952674866,
92
+ "kl": 0.0021914205979555847,
93
  "learning_rate": 1.507684480352292e-08,
94
  "loss": 0.0,
95
+ "reward": 4.9426099300384525,
96
+ "reward_std": 0.11483407691121102,
97
+ "rewards/concensus_correctness_reward_func": 1.1760000169277192,
98
  "rewards/consensus_reward_func": 2.0,
99
  "rewards/cumulative_reward_2": 0.0,
100
  "rewards/final_correctness_reward_func": 0.8,
101
+ "rewards/question_recreation_reward_func": 0.9906099438667297,
102
  "rewards/soft_format_reward_func": 0.0,
103
  "rewards/strict_format_reward_func": 0.0,
104
+ "rewards/xmlcount_reward_func": -0.024000001326203346,
105
  "step": 10
106
  },
107
  {
108
  "epoch": 3.4444444444444446,
109
  "step": 10,
110
  "total_flos": 0.0,
111
+ "train_loss": 3.932331696887559e-06,
112
+ "train_runtime": 4767.9782,
113
+ "train_samples_per_second": 0.017,
114
  "train_steps_per_second": 0.002
115
  }
116
  ],