pragsri8 commited on
Commit
6558474
·
verified ·
1 Parent(s): c704188

Model save

Browse files
Files changed (4) hide show
  1. README.md +4 -10
  2. all_results.json +4 -9
  3. train_results.json +4 -4
  4. trainer_state.json +32 -32
README.md CHANGED
@@ -1,17 +1,11 @@
1
  ---
2
  library_name: transformers
3
- license: llama3.1
4
- base_model: meta-llama/Llama-3.1-8B
5
  tags:
6
- - alignment-handbook
7
- - trl
8
- - sft
9
- - generated_from_trainer
10
  - trl
11
  - sft
12
  - generated_from_trainer
13
  datasets:
14
- - pragsri8/RLHFlow_ultrafeedback_iter1_vanilla_bon-sft
15
  model-index:
16
  - name: llama-3.1-8b-sft-full_vanilla_bon-sft
17
  results: []
@@ -22,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # llama-3.1-8b-sft-full_vanilla_bon-sft
24
 
25
- This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the pragsri8/RLHFlow_ultrafeedback_iter1_vanilla_bon-sft dataset.
26
  It achieves the following results on the evaluation set:
27
- - Loss: 1.3875
28
 
29
  ## Model description
30
 
@@ -60,7 +54,7 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss |
62
  |:-------------:|:-----:|:----:|:---------------:|
63
- | 1.3769 | 1.0 | 59 | 1.3875 |
64
 
65
 
66
  ### Framework versions
 
1
  ---
2
  library_name: transformers
 
 
3
  tags:
 
 
 
 
4
  - trl
5
  - sft
6
  - generated_from_trainer
7
  datasets:
8
+ - generator
9
  model-index:
10
  - name: llama-3.1-8b-sft-full_vanilla_bon-sft
11
  results: []
 
16
 
17
  # llama-3.1-8b-sft-full_vanilla_bon-sft
18
 
19
+ This model was trained from scratch on the generator dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.3202
22
 
23
  ## Model description
24
 
 
54
 
55
  | Training Loss | Epoch | Step | Validation Loss |
56
  |:-------------:|:-----:|:----:|:---------------:|
57
+ | 1.3131 | 1.0 | 59 | 1.3202 |
58
 
59
 
60
  ### Framework versions
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
- "eval_loss": 1.3874934911727905,
4
- "eval_runtime": 10.447,
5
- "eval_samples": 2000,
6
- "eval_samples_per_second": 19.144,
7
- "eval_steps_per_second": 0.67,
8
  "total_flos": 12353399685120.0,
9
- "train_loss": 1.5075374740665242,
10
- "train_runtime": 421.8974,
11
  "train_samples": 18000,
12
- "train_samples_per_second": 4.406,
13
- "train_steps_per_second": 0.14
14
  }
 
1
  {
2
  "epoch": 1.0,
 
 
 
 
 
3
  "total_flos": 12353399685120.0,
4
+ "train_loss": 1.3692619558108055,
5
+ "train_runtime": 423.541,
6
  "train_samples": 18000,
7
+ "train_samples_per_second": 4.389,
8
+ "train_steps_per_second": 0.139
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 12353399685120.0,
4
- "train_loss": 1.5075374740665242,
5
- "train_runtime": 421.8974,
6
  "train_samples": 18000,
7
- "train_samples_per_second": 4.406,
8
- "train_steps_per_second": 0.14
9
  }
 
1
  {
2
  "epoch": 1.0,
3
  "total_flos": 12353399685120.0,
4
+ "train_loss": 1.3692619558108055,
5
+ "train_runtime": 423.541,
6
  "train_samples": 18000,
7
+ "train_samples_per_second": 4.389,
8
+ "train_steps_per_second": 0.139
9
  }
trainer_state.json CHANGED
@@ -10,104 +10,104 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.01694915254237288,
13
- "grad_norm": 60.8476107354802,
14
  "learning_rate": 3.3333333333333333e-06,
15
- "loss": 1.8472,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.0847457627118644,
20
- "grad_norm": 8.877773177483087,
21
  "learning_rate": 1.6666666666666667e-05,
22
- "loss": 1.886,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.1694915254237288,
27
- "grad_norm": 2.740465644582342,
28
  "learning_rate": 1.972022914080411e-05,
29
- "loss": 1.7013,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.2542372881355932,
34
- "grad_norm": 1.6381019682815696,
35
  "learning_rate": 1.8610436117673557e-05,
36
- "loss": 1.5483,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.3389830508474576,
41
- "grad_norm": 1.552887448490065,
42
  "learning_rate": 1.6749830015182106e-05,
43
- "loss": 1.5449,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.423728813559322,
48
- "grad_norm": 1.3828333517037474,
49
  "learning_rate": 1.4300652022765207e-05,
50
- "loss": 1.5062,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.5084745762711864,
55
- "grad_norm": 1.22531732205893,
56
  "learning_rate": 1.1476465640024814e-05,
57
- "loss": 1.4417,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.5932203389830508,
62
- "grad_norm": 1.2049898344204903,
63
  "learning_rate": 8.52353435997519e-06,
64
- "loss": 1.4424,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.6779661016949152,
69
- "grad_norm": 1.1452259418352282,
70
  "learning_rate": 5.699347977234799e-06,
71
- "loss": 1.4163,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.7627118644067796,
76
- "grad_norm": 1.0826635541016094,
77
  "learning_rate": 3.250169984817897e-06,
78
- "loss": 1.3829,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.847457627118644,
83
- "grad_norm": 1.0546970085472138,
84
  "learning_rate": 1.3895638823264447e-06,
85
- "loss": 1.3876,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.9322033898305084,
90
- "grad_norm": 1.072858228582258,
91
  "learning_rate": 2.7977085919589253e-07,
92
- "loss": 1.3769,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 1.0,
97
- "eval_loss": 1.3874934911727905,
98
- "eval_runtime": 10.4957,
99
- "eval_samples_per_second": 19.055,
100
- "eval_steps_per_second": 0.667,
101
  "step": 59
102
  },
103
  {
104
  "epoch": 1.0,
105
  "step": 59,
106
  "total_flos": 12353399685120.0,
107
- "train_loss": 1.5075374740665242,
108
- "train_runtime": 421.8974,
109
- "train_samples_per_second": 4.406,
110
- "train_steps_per_second": 0.14
111
  }
112
  ],
113
  "logging_steps": 5,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.01694915254237288,
13
+ "grad_norm": 3.0302986163078596,
14
  "learning_rate": 3.3333333333333333e-06,
15
+ "loss": 1.3982,
16
  "step": 1
17
  },
18
  {
19
  "epoch": 0.0847457627118644,
20
+ "grad_norm": 3.2967950958526457,
21
  "learning_rate": 1.6666666666666667e-05,
22
+ "loss": 1.4931,
23
  "step": 5
24
  },
25
  {
26
  "epoch": 0.1694915254237288,
27
+ "grad_norm": 1.4683392123253753,
28
  "learning_rate": 1.972022914080411e-05,
29
+ "loss": 1.4567,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 0.2542372881355932,
34
+ "grad_norm": 1.1218652958716022,
35
  "learning_rate": 1.8610436117673557e-05,
36
+ "loss": 1.367,
37
  "step": 15
38
  },
39
  {
40
  "epoch": 0.3389830508474576,
41
+ "grad_norm": 0.97543948580105,
42
  "learning_rate": 1.6749830015182106e-05,
43
+ "loss": 1.4006,
44
  "step": 20
45
  },
46
  {
47
  "epoch": 0.423728813559322,
48
+ "grad_norm": 0.9114604776355013,
49
  "learning_rate": 1.4300652022765207e-05,
50
+ "loss": 1.3829,
51
  "step": 25
52
  },
53
  {
54
  "epoch": 0.5084745762711864,
55
+ "grad_norm": 0.9539731539603004,
56
  "learning_rate": 1.1476465640024814e-05,
57
+ "loss": 1.3404,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 0.5932203389830508,
62
+ "grad_norm": 0.8978852079217283,
63
  "learning_rate": 8.52353435997519e-06,
64
+ "loss": 1.3571,
65
  "step": 35
66
  },
67
  {
68
  "epoch": 0.6779661016949152,
69
+ "grad_norm": 0.866628438912024,
70
  "learning_rate": 5.699347977234799e-06,
71
+ "loss": 1.3405,
72
  "step": 40
73
  },
74
  {
75
  "epoch": 0.7627118644067796,
76
+ "grad_norm": 0.8770654440256076,
77
  "learning_rate": 3.250169984817897e-06,
78
+ "loss": 1.3104,
79
  "step": 45
80
  },
81
  {
82
  "epoch": 0.847457627118644,
83
+ "grad_norm": 0.8449575811804665,
84
  "learning_rate": 1.3895638823264447e-06,
85
+ "loss": 1.3237,
86
  "step": 50
87
  },
88
  {
89
  "epoch": 0.9322033898305084,
90
+ "grad_norm": 0.8725200545477999,
91
  "learning_rate": 2.7977085919589253e-07,
92
+ "loss": 1.3131,
93
  "step": 55
94
  },
95
  {
96
  "epoch": 1.0,
97
+ "eval_loss": 1.3201909065246582,
98
+ "eval_runtime": 10.5592,
99
+ "eval_samples_per_second": 18.941,
100
+ "eval_steps_per_second": 0.663,
101
  "step": 59
102
  },
103
  {
104
  "epoch": 1.0,
105
  "step": 59,
106
  "total_flos": 12353399685120.0,
107
+ "train_loss": 1.3692619558108055,
108
+ "train_runtime": 423.541,
109
+ "train_samples_per_second": 4.389,
110
+ "train_steps_per_second": 0.139
111
  }
112
  ],
113
  "logging_steps": 5,