PeterLauLukCh commited on
Commit
2cbc972
·
verified ·
1 Parent(s): 7e7ad7b

Upload folder using huggingface_hub

Browse files
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 684863013568512.0,
4
- "train_loss": 0.023829213575364362,
5
- "train_runtime": 16963.4869,
6
  "train_samples": 8460,
7
- "train_samples_per_second": 1.496,
8
- "train_steps_per_second": 0.094
9
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "total_flos": 452559651471360.0,
4
+ "train_loss": 0.022131871465586973,
5
+ "train_runtime": 16494.942,
6
  "train_samples": 8460,
7
+ "train_samples_per_second": 1.026,
8
+ "train_steps_per_second": 0.085
9
  }
args.json CHANGED
@@ -70,14 +70,14 @@
70
  "kl_coeff": 0.0,
71
  "label_names": null,
72
  "label_smoothing_factor": 0.0,
73
- "learning_rate": 1e-06,
74
  "length_column_name": "length",
75
  "load_best_model_at_end": false,
76
  "local_rank": 0,
77
  "log_level": "passive",
78
  "log_level_replica": "warning",
79
  "log_on_each_node": true,
80
- "logging_dir": "/scratch/gpfs/jg9904/saved_models/Qwen2.5-14B-Instruct-RL/runs/Apr24_14-16-12_della-k11g2",
81
  "logging_first_step": false,
82
  "logging_nan_inf_filter": true,
83
  "logging_steps": 1.0,
@@ -89,15 +89,15 @@
89
  "max_steps": -1,
90
  "max_train_samples": null,
91
  "metric_for_best_model": null,
92
- "model_name_or_path": "/scratch/gpfs/jg9904/saved_models/Qwen2.5-14B-Instruct-SFT",
93
  "mp_parameters": "",
94
  "neftune_noise_alpha": null,
95
  "no_cuda": false,
96
- "num_train_epochs": 3.0,
97
  "optim": "adamw_torch",
98
  "optim_args": null,
99
  "optim_target_modules": null,
100
- "output_dir": "/scratch/gpfs/jg9904/saved_models/Qwen2.5-14B-Instruct-RL",
101
  "overwrite_cache": false,
102
  "overwrite_output_dir": true,
103
  "past_index": -1,
@@ -117,14 +117,14 @@
117
  ],
118
  "restore_callback_states_from_checkpoint": false,
119
  "resume_from_checkpoint": null,
120
- "run_name": "/scratch/gpfs/jg9904/saved_models/Qwen2.5-14B-Instruct-RL",
121
  "save_on_each_node": false,
122
  "save_only_model": true,
123
  "save_safetensors": true,
124
  "save_steps": 500,
125
  "save_strategy": "no",
126
  "save_total_limit": null,
127
- "seed": 42,
128
  "skip_memory_metrics": true,
129
  "split_batches": null,
130
  "tf32": null,
@@ -136,7 +136,7 @@
136
  "tp_size": 0,
137
  "tpu_metrics_debug": false,
138
  "tpu_num_cores": null,
139
- "train_tokenized_file": "/scratch/gpfs/jg9904/cogbehaveRL/RL/offline_rl_v2/data/14K_reward.jsonl",
140
  "use_cpu": false,
141
  "use_flash_attn": true,
142
  "use_ipex": false,
 
70
  "kl_coeff": 0.0,
71
  "label_names": null,
72
  "label_smoothing_factor": 0.0,
73
+ "learning_rate": 9e-07,
74
  "length_column_name": "length",
75
  "load_best_model_at_end": false,
76
  "local_rank": 0,
77
  "log_level": "passive",
78
  "log_level_replica": "warning",
79
  "log_on_each_node": true,
80
+ "logging_dir": "/scratch/gpfs/jg9904/saved_models/Qwen2.5-14B-Instruct-RL-2.1/runs/Apr27_14-34-38_della-j16g2",
81
  "logging_first_step": false,
82
  "logging_nan_inf_filter": true,
83
  "logging_steps": 1.0,
 
89
  "max_steps": -1,
90
  "max_train_samples": null,
91
  "metric_for_best_model": null,
92
+ "model_name_or_path": "/scratch/gpfs/jg9904/saved_models/Qwen2.5-14B-Instruct-RL",
93
  "mp_parameters": "",
94
  "neftune_noise_alpha": null,
95
  "no_cuda": false,
96
+ "num_train_epochs": 2.0,
97
  "optim": "adamw_torch",
98
  "optim_args": null,
99
  "optim_target_modules": null,
100
+ "output_dir": "/scratch/gpfs/jg9904/saved_models/Qwen2.5-14B-Instruct-RL-2.1",
101
  "overwrite_cache": false,
102
  "overwrite_output_dir": true,
103
  "past_index": -1,
 
117
  ],
118
  "restore_callback_states_from_checkpoint": false,
119
  "resume_from_checkpoint": null,
120
+ "run_name": "/scratch/gpfs/jg9904/saved_models/Qwen2.5-14B-Instruct-RL-2.1",
121
  "save_on_each_node": false,
122
  "save_only_model": true,
123
  "save_safetensors": true,
124
  "save_steps": 500,
125
  "save_strategy": "no",
126
  "save_total_limit": null,
127
+ "seed": 26,
128
  "skip_memory_metrics": true,
129
  "split_batches": null,
130
  "tf32": null,
 
136
  "tp_size": 0,
137
  "tpu_metrics_debug": false,
138
  "tpu_num_cores": null,
139
+ "train_tokenized_file": "/scratch/gpfs/jg9904/cogbehaveRL/RL/offline_rl_v2/data/14K_reward-r2.jsonl",
140
  "use_cpu": false,
141
  "use_flash_attn": true,
142
  "use_ipex": false,
model-00001-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f18f6592df3a3802936a42bbe707273fa833f75f023970db5938538c5827c04
3
  size 4986211280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59689bf092b786e692f1353c24bedf498061684513464b7a73a7510467bdddf3
3
  size 4986211280
model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab61ee1441c650203c387244c225fb1aed7f323a7ed948c1893dbf809397cc8f
3
  size 4954847344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85a1620514c230c48bfb1aba8750847a213de948d3a0e8d2d855d60d9ae9ad94
3
  size 4954847344
model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f62d595693b6fbd47cb7c22c13b57a20a1cc33fd960485b680318852a0ae5dc
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51cecf40de633b7c2e92622fbe91b6cb401d5b57da64c53c2704e2b512eaf9a8
3
  size 4954847392
model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e10f48bd178f34c4b43b27d39ba28c2287762bea10f17c67050577144551da8
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8f7f572db0875c8b2bd1c11b6a627c32ae1205cf1380071260d3878a03f2a3
3
  size 4954847392
model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c76c97e126c7218861307c02f09784b66acb906eb151ef0f46e3f5b86a3bb92b
3
  size 4954847392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391045fc5562f499cd5ced91de26b3cd25da8094ded5c7350d9446b72590c804
3
  size 4954847392
model-00006-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ace25ba75838b279919e80863544cbb71bc0e18daebd7f317d2ce6e5bb4652bf
3
  size 4734533160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c24750ce1b70304e6cdcd7f13f716ba999105e1d2ab22166b13961a1c2689ff
3
  size 4734533160
runs/Apr27_14-34-38_della-j16g2/events.out.tfevents.1745778947.della-j16g2.2201280.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3192051163af29dd14cc1cdcbf4c9776ac4444141d1a2249c73f662e53cbcf16
3
+ size 515149
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 684863013568512.0,
4
- "train_loss": 0.023829213575364362,
5
- "train_runtime": 16963.4869,
6
  "train_samples": 8460,
7
- "train_samples_per_second": 1.496,
8
- "train_steps_per_second": 0.094
9
  }
 
1
  {
2
+ "epoch": 2.0,
3
+ "total_flos": 452559651471360.0,
4
+ "train_loss": 0.022131871465586973,
5
+ "train_runtime": 16494.942,
6
  "train_samples": 8460,
7
+ "train_samples_per_second": 1.026,
8
+ "train_steps_per_second": 0.085
9
  }
training.log CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f8a572573859a793cde5a6cf88ab0baee35cfabcb572759b18c90d158c2e2d3
3
  size 7160
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d17d30d7ac9e972934bf62062e8a2b07b263c0053803f3234d294d7b4a13ce17
3
  size 7160