m4lw4r3exe commited on
Commit
4088083
·
1 Parent(s): 0e9ba0a

Upload manual_upload with huggingface_hub

Browse files
manual_upload/manual_upload/manual_upload/manual_upload/.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
manual_upload/manual_upload/manual_upload/manual_upload/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
manual_upload/manual_upload/manual_upload/manual_upload/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "pad_token": "[PAD]"
3
+ }
manual_upload/manual_upload/manual_upload/manual_upload/tokenizer.json ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 2048,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Right",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 1,
14
+ "pad_type_id": 0,
15
+ "pad_token": "[PAD]"
16
+ },
17
+ "added_tokens": [
18
+ {
19
+ "id": 0,
20
+ "content": "[UNK]",
21
+ "single_word": false,
22
+ "lstrip": false,
23
+ "rstrip": false,
24
+ "normalized": false,
25
+ "special": true
26
+ },
27
+ {
28
+ "id": 1,
29
+ "content": "[PAD]",
30
+ "single_word": false,
31
+ "lstrip": false,
32
+ "rstrip": false,
33
+ "normalized": false,
34
+ "special": true
35
+ },
36
+ {
37
+ "id": 2,
38
+ "content": "[MASK]",
39
+ "single_word": false,
40
+ "lstrip": false,
41
+ "rstrip": false,
42
+ "normalized": false,
43
+ "special": true
44
+ }
45
+ ],
46
+ "normalizer": null,
47
+ "pre_tokenizer": {
48
+ "type": "WhitespaceSplit"
49
+ },
50
+ "post_processor": null,
51
+ "decoder": null,
52
+ "model": {
53
+ "type": "WordLevel",
54
+ "vocab": {
55
+ "[UNK]": 0,
56
+ "[PAD]": 1,
57
+ "[MASK]": 2,
58
+ "TIME_DELTA=1": 3,
59
+ "TIME_DELTA=2": 4,
60
+ "BAR_END": 5,
61
+ "BAR_START": 6,
62
+ "NOTE_OFF=42": 7,
63
+ "NOTE_ON=42": 8,
64
+ "NOTE_OFF=36": 9,
65
+ "NOTE_ON=36": 10,
66
+ "TIME_DELTA=4": 11,
67
+ "NOTE_OFF=38": 12,
68
+ "NOTE_ON=38": 13,
69
+ "NOTE_OFF=54": 14,
70
+ "NOTE_ON=54": 15,
71
+ "NOTE_OFF=62": 16,
72
+ "NOTE_ON=62": 17,
73
+ "NOTE_OFF=64": 18,
74
+ "NOTE_ON=64": 19,
75
+ "TIME_DELTA=3": 20,
76
+ "NOTE_OFF=57": 21,
77
+ "NOTE_ON=57": 22,
78
+ "NOTE_OFF=69": 23,
79
+ "NOTE_ON=69": 24,
80
+ "TRACK_END": 25,
81
+ "TRACK_START": 26,
82
+ "NOTE_OFF=40": 27,
83
+ "NOTE_ON=40": 28,
84
+ "NOTE_OFF=60": 29,
85
+ "NOTE_ON=60": 30,
86
+ "NOTE_OFF=35": 31,
87
+ "NOTE_ON=35": 32,
88
+ "NOTE_OFF=59": 33,
89
+ "NOTE_ON=59": 34,
90
+ "NOTE_OFF=55": 35,
91
+ "NOTE_ON=55": 36,
92
+ "NOTE_OFF=46": 37,
93
+ "NOTE_ON=46": 38,
94
+ "NOTE_OFF=67": 39,
95
+ "NOTE_ON=67": 40,
96
+ "NOTE_OFF=70": 41,
97
+ "NOTE_ON=70": 42,
98
+ "NOTE_OFF=50": 43,
99
+ "NOTE_ON=50": 44,
100
+ "NOTE_OFF=44": 45,
101
+ "NOTE_ON=44": 46,
102
+ "NOTE_OFF=52": 47,
103
+ "NOTE_ON=52": 48,
104
+ "NOTE_OFF=61": 49,
105
+ "NOTE_ON=61": 50,
106
+ "NOTE_OFF=65": 51,
107
+ "NOTE_ON=65": 52,
108
+ "NOTE_OFF=63": 53,
109
+ "NOTE_ON=63": 54,
110
+ "NOTE_OFF=66": 55,
111
+ "NOTE_ON=66": 56,
112
+ "NOTE_OFF=45": 57,
113
+ "NOTE_ON=45": 58,
114
+ "NOTE_OFF=51": 59,
115
+ "NOTE_ON=51": 60,
116
+ "NOTE_OFF=43": 61,
117
+ "NOTE_ON=43": 62,
118
+ "NOTE_OFF=48": 63,
119
+ "NOTE_ON=48": 64,
120
+ "NOTE_OFF=58": 65,
121
+ "NOTE_ON=58": 66,
122
+ "NOTE_OFF=39": 67,
123
+ "NOTE_ON=39": 68,
124
+ "NOTE_OFF=53": 69,
125
+ "NOTE_ON=53": 70,
126
+ "NOTE_OFF=56": 71,
127
+ "NOTE_ON=56": 72,
128
+ "NOTE_OFF=47": 73,
129
+ "NOTE_ON=47": 74,
130
+ "NOTE_OFF=68": 75,
131
+ "NOTE_ON=68": 76,
132
+ "NOTE_OFF=49": 77,
133
+ "NOTE_ON=49": 78,
134
+ "NOTE_OFF=72": 79,
135
+ "NOTE_ON=72": 80,
136
+ "NOTE_OFF=71": 81,
137
+ "NOTE_ON=71": 82,
138
+ "NOTE_OFF=41": 83,
139
+ "NOTE_ON=41": 84,
140
+ "NOTE_OFF=74": 85,
141
+ "NOTE_ON=74": 86,
142
+ "NOTE_OFF=33": 87,
143
+ "NOTE_ON=33": 88,
144
+ "TIME_DELTA=6": 89,
145
+ "NOTE_OFF=82": 90,
146
+ "NOTE_ON=82": 91,
147
+ "TIME_DELTA=16": 92,
148
+ "TIME_DELTA=8": 93,
149
+ "NOTE_OFF=37": 94,
150
+ "NOTE_ON=37": 95,
151
+ "NOTE_OFF=31": 96,
152
+ "NOTE_ON=31": 97,
153
+ "NOTE_OFF=76": 98,
154
+ "NOTE_ON=76": 99,
155
+ "DENSITY=3": 100,
156
+ "NOTE_OFF=73": 101,
157
+ "NOTE_ON=73": 102,
158
+ "DENSITY=0": 103,
159
+ "NOTE_OFF=28": 104,
160
+ "NOTE_ON=28": 105,
161
+ "DENSITY=1": 106,
162
+ "DENSITY=2": 107,
163
+ "NOTE_OFF=34": 108,
164
+ "NOTE_ON=34": 109,
165
+ "INST=3": 110,
166
+ "NOTE_OFF=75": 111,
167
+ "NOTE_ON=75": 112,
168
+ "NOTE_OFF=77": 113,
169
+ "NOTE_ON=77": 114,
170
+ "PIECE_START": 115,
171
+ "NOTE_OFF=79": 116,
172
+ "NOTE_ON=79": 117,
173
+ "INST=DRUMS": 118,
174
+ "NOTE_OFF=32": 119,
175
+ "NOTE_ON=32": 120,
176
+ "NOTE_OFF=29": 121,
177
+ "NOTE_ON=29": 122,
178
+ "INST=4": 123,
179
+ "NOTE_OFF=81": 124,
180
+ "NOTE_ON=81": 125,
181
+ "TIME_DELTA=5": 126,
182
+ "NOTE_OFF=78": 127,
183
+ "NOTE_ON=78": 128,
184
+ "NOTE_OFF=30": 129,
185
+ "NOTE_ON=30": 130,
186
+ "NOTE_OFF=27": 131,
187
+ "NOTE_ON=27": 132,
188
+ "INST=6": 133,
189
+ "NOTE_OFF=80": 134,
190
+ "NOTE_ON=80": 135,
191
+ "TIME_DELTA=7": 136,
192
+ "NOTE_OFF=26": 137,
193
+ "NOTE_ON=26": 138,
194
+ "INST=0": 139,
195
+ "NOTE_OFF=83": 140,
196
+ "NOTE_ON=83": 141,
197
+ "TIME_DELTA=12": 142,
198
+ "TIME_DELTA=10": 143,
199
+ "NOTE_OFF=84": 144,
200
+ "NOTE_ON=84": 145,
201
+ "NOTE_OFF=86": 146,
202
+ "NOTE_ON=86": 147,
203
+ "INST=10": 148,
204
+ "NOTE_OFF=85": 149,
205
+ "NOTE_ON=85": 150,
206
+ "TIME_DELTA=14": 151,
207
+ "TIME_DELTA=15": 152,
208
+ "NOTE_OFF=88": 153,
209
+ "NOTE_ON=88": 154,
210
+ "INST=8": 155,
211
+ "INST=11": 156,
212
+ "NOTE_OFF=87": 157,
213
+ "NOTE_ON=87": 158,
214
+ "TIME_DELTA=9": 159,
215
+ "NOTE_OFF=24": 160,
216
+ "NOTE_ON=24": 161,
217
+ "INST=7": 162,
218
+ "NOTE_OFF=25": 163,
219
+ "NOTE_ON=25": 164,
220
+ "NOTE_OFF=89": 165,
221
+ "NOTE_ON=89": 166,
222
+ "NOTE_OFF=91": 167,
223
+ "NOTE_ON=91": 168,
224
+ "TIME_DELTA=11": 169,
225
+ "TIME_DELTA=13": 170,
226
+ "INST=2": 171,
227
+ "NOTE_OFF=93": 172,
228
+ "NOTE_ON=93": 173,
229
+ "NOTE_OFF=22": 174,
230
+ "NOTE_ON=22": 175,
231
+ "NOTE_OFF=23": 176,
232
+ "NOTE_ON=23": 177,
233
+ "NOTE_OFF=90": 178,
234
+ "NOTE_ON=90": 179,
235
+ "INST=9": 180,
236
+ "INST=5": 181,
237
+ "INST=1": 182,
238
+ "NOTE_OFF=94": 183,
239
+ "NOTE_ON=94": 184,
240
+ "INST=12": 185,
241
+ "INST=14": 186,
242
+ "NOTE_OFF=92": 187,
243
+ "NOTE_ON=92": 188,
244
+ "NOTE_OFF=96": 189,
245
+ "NOTE_ON=96": 190,
246
+ "NOTE_OFF=95": 191,
247
+ "NOTE_ON=95": 192,
248
+ "NOTE_OFF=98": 193,
249
+ "NOTE_ON=98": 194,
250
+ "INST=15": 195,
251
+ "NOTE_OFF=21": 196,
252
+ "NOTE_ON=21": 197,
253
+ "INST=13": 198,
254
+ "NOTE_OFF=19": 199,
255
+ "NOTE_ON=19": 200,
256
+ "NOTE_OFF=99": 201,
257
+ "NOTE_ON=99": 202,
258
+ "NOTE_OFF=97": 203,
259
+ "NOTE_ON=97": 204,
260
+ "NOTE_OFF=0": 205,
261
+ "NOTE_ON=0": 206,
262
+ "NOTE_OFF=100": 207,
263
+ "NOTE_ON=100": 208,
264
+ "NOTE_OFF=16": 209,
265
+ "NOTE_ON=16": 210,
266
+ "NOTE_OFF=13": 211,
267
+ "NOTE_ON=13": 212,
268
+ "NOTE_OFF=20": 213,
269
+ "NOTE_ON=20": 214,
270
+ "NOTE_OFF=105": 215,
271
+ "NOTE_ON=105": 216,
272
+ "NOTE_OFF=103": 217,
273
+ "NOTE_ON=103": 218,
274
+ "NOTE_OFF=101": 219,
275
+ "NOTE_ON=101": 220,
276
+ "NOTE_OFF=102": 221,
277
+ "NOTE_ON=102": 222,
278
+ "NOTE_OFF=17": 223,
279
+ "NOTE_ON=17": 224,
280
+ "NOTE_OFF=107": 225,
281
+ "NOTE_ON=107": 226,
282
+ "NOTE_OFF=108": 227,
283
+ "NOTE_ON=108": 228,
284
+ "NOTE_OFF=18": 229,
285
+ "NOTE_ON=18": 230,
286
+ "NOTE_OFF=126": 231,
287
+ "NOTE_ON=126": 232,
288
+ "NOTE_OFF=104": 233,
289
+ "NOTE_ON=104": 234,
290
+ "NOTE_OFF=8": 235,
291
+ "NOTE_ON=8": 236,
292
+ "NOTE_OFF=117": 237,
293
+ "NOTE_ON=117": 238,
294
+ "NOTE_OFF=106": 239,
295
+ "NOTE_ON=106": 240,
296
+ "NOTE_OFF=110": 241,
297
+ "NOTE_ON=110": 242,
298
+ "NOTE_OFF=112": 243,
299
+ "NOTE_ON=112": 244,
300
+ "NOTE_OFF=12": 245,
301
+ "NOTE_ON=12": 246,
302
+ "NOTE_OFF=9": 247,
303
+ "NOTE_ON=9": 248,
304
+ "NOTE_OFF=14": 249,
305
+ "NOTE_ON=14": 250,
306
+ "NOTE_OFF=113": 251,
307
+ "NOTE_ON=113": 252,
308
+ "NOTE_OFF=15": 253,
309
+ "NOTE_ON=15": 254,
310
+ "NOTE_OFF=125": 255,
311
+ "NOTE_ON=125": 256,
312
+ "NOTE_OFF=109": 257,
313
+ "NOTE_ON=109": 258,
314
+ "NOTE_OFF=115": 259,
315
+ "NOTE_ON=115": 260,
316
+ "NOTE_OFF=120": 261,
317
+ "NOTE_ON=120": 262,
318
+ "NOTE_OFF=119": 263,
319
+ "NOTE_ON=119": 264,
320
+ "NOTE_OFF=122": 265,
321
+ "NOTE_ON=122": 266,
322
+ "NOTE_OFF=124": 267,
323
+ "NOTE_OFF=127": 268,
324
+ "NOTE_ON=124": 269,
325
+ "NOTE_ON=127": 270,
326
+ "NOTE_OFF=11": 271,
327
+ "NOTE_ON=11": 272,
328
+ "NOTE_OFF=4": 273,
329
+ "NOTE_ON=4": 274,
330
+ "NOTE_OFF=10": 275,
331
+ "NOTE_ON=10": 276,
332
+ "NOTE_OFF=111": 277,
333
+ "NOTE_ON=111": 278,
334
+ "NOTE_OFF=5": 279,
335
+ "NOTE_OFF=6": 280,
336
+ "NOTE_ON=5": 281,
337
+ "NOTE_ON=6": 282,
338
+ "NOTE_OFF=1": 283,
339
+ "NOTE_ON=1": 284,
340
+ "NOTE_OFF=114": 285,
341
+ "NOTE_ON=114": 286,
342
+ "NOTE_OFF=2": 287,
343
+ "NOTE_ON=2": 288,
344
+ "NOTE_OFF=7": 289,
345
+ "NOTE_ON=7": 290,
346
+ "NOTE_OFF=3": 291,
347
+ "NOTE_ON=3": 292,
348
+ "NOTE_OFF=116": 293,
349
+ "NOTE_OFF=121": 294,
350
+ "NOTE_ON=116": 295,
351
+ "NOTE_ON=121": 296,
352
+ "NOTE_OFF=118": 297,
353
+ "NOTE_ON=118": 298
354
+ },
355
+ "unk_token": "[UNK]"
356
+ }
357
+ }
manual_upload/manual_upload/manual_upload/manual_upload/tokenizer_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "model_max_length": 1000000000000000019884624838656,
3
+ "tokenizer_class": "PreTrainedTokenizerFast"
4
+ }
manual_upload/manual_upload/manual_upload/manual_upload/trainer_state.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": null,
5
+ "global_step": 0,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [],
10
+ "max_steps": 0,
11
+ "num_train_epochs": 0,
12
+ "total_flos": 0,
13
+ "trial_name": null,
14
+ "trial_params": null
15
+ }
manual_upload/manual_upload/manual_upload/manual_upload/training_args.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "models/improved_4bars",
3
+ "overwrite_output_dir": true,
4
+ "do_train": false,
5
+ "do_eval": true,
6
+ "do_predict": false,
7
+ "evaluation_strategy": "steps",
8
+ "prediction_loss_only": false,
9
+ "per_device_train_batch_size": 10,
10
+ "per_device_eval_batch_size": 8,
11
+ "per_gpu_train_batch_size": null,
12
+ "per_gpu_eval_batch_size": null,
13
+ "gradient_accumulation_steps": 1,
14
+ "eval_accumulation_steps": null,
15
+ "eval_delay": 0,
16
+ "learning_rate": 0.0005,
17
+ "weight_decay": 0.1,
18
+ "adam_beta1": 0.9,
19
+ "adam_beta2": 0.999,
20
+ "adam_epsilon": 1e-08,
21
+ "max_grad_norm": 1.0,
22
+ "num_train_epochs": 10,
23
+ "max_steps": -1,
24
+ "lr_scheduler_type": "cosine",
25
+ "warmup_ratio": 0.0,
26
+ "warmup_steps": 5000,
27
+ "log_level": "passive",
28
+ "log_level_replica": "passive",
29
+ "log_on_each_node": true,
30
+ "logging_dir": "models/improved_4bars/logs",
31
+ "logging_strategy": "steps",
32
+ "logging_first_step": false,
33
+ "logging_steps": 2048,
34
+ "logging_nan_inf_filter": true,
35
+ "save_strategy": "steps",
36
+ "save_steps": 8192,
37
+ "save_total_limit": 5,
38
+ "save_on_each_node": false,
39
+ "no_cuda": false,
40
+ "use_mps_device": false,
41
+ "seed": 42,
42
+ "data_seed": null,
43
+ "jit_mode_eval": false,
44
+ "use_ipex": false,
45
+ "bf16": false,
46
+ "fp16": true,
47
+ "fp16_opt_level": "O1",
48
+ "half_precision_backend": "cuda_amp",
49
+ "bf16_full_eval": false,
50
+ "fp16_full_eval": false,
51
+ "tf32": null,
52
+ "local_rank": -1,
53
+ "xpu_backend": null,
54
+ "tpu_num_cores": null,
55
+ "tpu_metrics_debug": false,
56
+ "debug": [],
57
+ "dataloader_drop_last": false,
58
+ "eval_steps": 2048,
59
+ "dataloader_num_workers": 0,
60
+ "past_index": -1,
61
+ "run_name": "models/improved_4bars",
62
+ "disable_tqdm": false,
63
+ "remove_unused_columns": true,
64
+ "label_names": null,
65
+ "load_best_model_at_end": false,
66
+ "metric_for_best_model": null,
67
+ "greater_is_better": null,
68
+ "ignore_data_skip": false,
69
+ "sharded_ddp": [],
70
+ "fsdp": [],
71
+ "fsdp_min_num_params": 0,
72
+ "fsdp_transformer_layer_cls_to_wrap": null,
73
+ "deepspeed": null,
74
+ "label_smoothing_factor": 0.0,
75
+ "optim": "adamw_hf",
76
+ "optim_args": null,
77
+ "adafactor": false,
78
+ "group_by_length": false,
79
+ "length_column_name": "length",
80
+ "report_to": [
81
+ "wandb"
82
+ ],
83
+ "ddp_find_unused_parameters": null,
84
+ "ddp_bucket_cap_mb": null,
85
+ "dataloader_pin_memory": true,
86
+ "skip_memory_metrics": true,
87
+ "use_legacy_prediction_loop": false,
88
+ "push_to_hub": true,
89
+ "resume_from_checkpoint": null,
90
+ "hub_model_id": "JammyMachina/improved_4bars-mdl",
91
+ "hub_strategy": "every_save",
92
+ "hub_token": "<HUB_TOKEN>",
93
+ "hub_private_repo": false,
94
+ "gradient_checkpointing": false,
95
+ "include_inputs_for_metrics": false,
96
+ "fp16_backend": "auto",
97
+ "push_to_hub_model_id": null,
98
+ "push_to_hub_organization": null,
99
+ "push_to_hub_token": "<PUSH_TO_HUB_TOKEN>",
100
+ "mp_parameters": "",
101
+ "auto_find_batch_size": false,
102
+ "full_determinism": false,
103
+ "torchdynamo": null,
104
+ "ray_scope": "last",
105
+ "ddp_timeout": 1800,
106
+ "torch_compile": false,
107
+ "torch_compile_backend": null,
108
+ "torch_compile_mode": null
109
+ }
manual_upload/manual_upload/manual_upload/training_args.json CHANGED
@@ -6,7 +6,7 @@
6
  "do_predict": false,
7
  "evaluation_strategy": "steps",
8
  "prediction_loss_only": false,
9
- "per_device_train_batch_size": 10,
10
  "per_device_eval_batch_size": 8,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
 
6
  "do_predict": false,
7
  "evaluation_strategy": "steps",
8
  "prediction_loss_only": false,
9
+ "per_device_train_batch_size": 8,
10
  "per_device_eval_batch_size": 8,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
manual_upload/manual_upload/training_args.json CHANGED
@@ -6,7 +6,7 @@
6
  "do_predict": false,
7
  "evaluation_strategy": "steps",
8
  "prediction_loss_only": false,
9
- "per_device_train_batch_size": 8,
10
  "per_device_eval_batch_size": 8,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
 
6
  "do_predict": false,
7
  "evaluation_strategy": "steps",
8
  "prediction_loss_only": false,
9
+ "per_device_train_batch_size": 7,
10
  "per_device_eval_batch_size": 8,
11
  "per_gpu_train_batch_size": null,
12
  "per_gpu_eval_batch_size": null,
manual_upload/training_args.json CHANGED
@@ -19,7 +19,7 @@
19
  "adam_beta2": 0.999,
20
  "adam_epsilon": 1e-08,
21
  "max_grad_norm": 1.0,
22
- "num_train_epochs": 10,
23
  "max_steps": -1,
24
  "lr_scheduler_type": "cosine",
25
  "warmup_ratio": 0.0,
 
19
  "adam_beta2": 0.999,
20
  "adam_epsilon": 1e-08,
21
  "max_grad_norm": 1.0,
22
+ "num_train_epochs": 8,
23
  "max_steps": -1,
24
  "lr_scheduler_type": "cosine",
25
  "warmup_ratio": 0.0,