nlparabic commited on
Commit
3a9df78
·
verified ·
1 Parent(s): 643136f

Training in progress, epoch 1

Browse files
Files changed (4) hide show
  1. egy_training_log.txt +572 -28
  2. model.safetensors +1 -1
  3. tokenizer.json +16 -2
  4. training_args.bin +1 -1
egy_training_log.txt CHANGED
@@ -70,7 +70,7 @@ local_rank=0,
70
  log_level=passive,
71
  log_level_replica=warning,
72
  log_on_each_node=True,
73
- logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug25_19-01-10_lmgpu-node-09,
74
  logging_first_step=False,
75
  logging_nan_inf_filter=True,
76
  logging_steps=500,
@@ -134,11 +134,153 @@ INFO:datasets.builder:Overwrite dataset info from restored data version if exist
134
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
135
  INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
136
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
137
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-2e8b6b1ec83a74b2.arrow
138
- INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-81e3380949037f68.arrow
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
140
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-931a5c298c87592b.arrow
141
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-5f36ba68f7702ab7.arrow
142
  WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
143
  WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
144
  INFO:__main__:Training/evaluation parameters TrainingArguments(
@@ -212,7 +354,279 @@ local_rank=0,
212
  log_level=passive,
213
  log_level_replica=warning,
214
  log_on_each_node=True,
215
- logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug25_19-02-04_lmgpu-node-09,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  logging_first_step=False,
217
  logging_nan_inf_filter=True,
218
  logging_steps=500,
@@ -276,11 +690,11 @@ INFO:datasets.builder:Overwrite dataset info from restored data version if exist
276
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
277
  INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
278
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
279
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-2e8b6b1ec83a74b2.arrow
280
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-81e3380949037f68.arrow
281
  WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
282
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-2e8b6b1ec83a74b2.arrow
283
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-81e3380949037f68.arrow
284
  WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
285
  WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
286
  INFO:__main__:Training/evaluation parameters TrainingArguments(
@@ -354,7 +768,143 @@ local_rank=0,
354
  log_level=passive,
355
  log_level_replica=warning,
356
  log_on_each_node=True,
357
- logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug25_19-04-25_lmgpu-node-09,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  logging_first_step=False,
359
  logging_nan_inf_filter=True,
360
  logging_steps=500,
@@ -490,7 +1040,7 @@ local_rank=0,
490
  log_level=passive,
491
  log_level_replica=warning,
492
  log_on_each_node=True,
493
- logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug25_19-05-20_lmgpu-node-09,
494
  logging_first_step=False,
495
  logging_nan_inf_filter=True,
496
  logging_steps=500,
@@ -626,7 +1176,7 @@ local_rank=0,
626
  log_level=passive,
627
  log_level_replica=warning,
628
  log_on_each_node=True,
629
- logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug25_19-06-09_lmgpu-node-09,
630
  logging_first_step=False,
631
  logging_nan_inf_filter=True,
632
  logging_steps=500,
@@ -690,11 +1240,11 @@ INFO:datasets.builder:Overwrite dataset info from restored data version if exist
690
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
691
  INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
692
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
693
- INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-ffc142ffcddbe7b3.arrow
694
- INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-1a35f1b6580983eb.arrow
695
  WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
696
- INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-a683048173790563.arrow
697
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-1a35f1b6580983eb.arrow
698
  WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
699
  WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
700
  INFO:__main__:Training/evaluation parameters TrainingArguments(
@@ -768,7 +1318,7 @@ local_rank=0,
768
  log_level=passive,
769
  log_level_replica=warning,
770
  log_on_each_node=True,
771
- logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug25_21-19-07_lmgpu-node-06,
772
  logging_first_step=False,
773
  logging_nan_inf_filter=True,
774
  logging_steps=500,
@@ -832,17 +1382,11 @@ INFO:datasets.builder:Overwrite dataset info from restored data version if exist
832
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
833
  INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
834
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
835
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-4cda59a599643701.arrow
836
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-d82ef9a45800c64f.arrow
837
  WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
838
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-038f8e8385bf6638.arrow
839
- INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-51f1e2b6546273ed.arrow
840
  WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
841
  INFO:root:Epoch 1.0: Train Loss = None, Eval Loss = None
842
  INFO:absl:Using default tokenizer.
843
- INFO:root:Epoch 2.0: Train Loss = 4.0723, Eval Loss = 2.958087921142578
844
- INFO:absl:Using default tokenizer.
845
- INFO:root:Epoch 3.0: Train Loss = 2.8055, Eval Loss = 2.606330394744873
846
- INFO:absl:Using default tokenizer.
847
- INFO:__main__:*** Evaluate ***
848
- INFO:absl:Using default tokenizer.
 
70
  log_level=passive,
71
  log_level_replica=warning,
72
  log_on_each_node=True,
73
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_08-29-09_lmgpu-node-02,
74
  logging_first_step=False,
75
  logging_nan_inf_filter=True,
76
  logging_steps=500,
 
134
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
135
  INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
136
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
137
+ INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-4cda59a599643701.arrow
138
+ INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-d82ef9a45800c64f.arrow
139
+ WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
140
+ INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-e14de7cbddab934f.arrow
141
+ INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-bac4c1d937397873.arrow
142
+ WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
143
+ WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
144
+ INFO:__main__:Training/evaluation parameters TrainingArguments(
145
+ _n_gpu=1,
146
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
147
+ adafactor=False,
148
+ adam_beta1=0.9,
149
+ adam_beta2=0.999,
150
+ adam_epsilon=1e-08,
151
+ auto_find_batch_size=False,
152
+ batch_eval_metrics=False,
153
+ bf16=False,
154
+ bf16_full_eval=False,
155
+ data_seed=None,
156
+ dataloader_drop_last=False,
157
+ dataloader_num_workers=0,
158
+ dataloader_persistent_workers=False,
159
+ dataloader_pin_memory=True,
160
+ dataloader_prefetch_factor=None,
161
+ ddp_backend=None,
162
+ ddp_broadcast_buffers=None,
163
+ ddp_bucket_cap_mb=None,
164
+ ddp_find_unused_parameters=None,
165
+ ddp_timeout=1800,
166
+ debug=[],
167
+ deepspeed=None,
168
+ disable_tqdm=False,
169
+ dispatch_batches=None,
170
+ do_eval=True,
171
+ do_predict=False,
172
+ do_train=True,
173
+ eval_accumulation_steps=None,
174
+ eval_delay=0,
175
+ eval_do_concat_batches=True,
176
+ eval_on_start=False,
177
+ eval_steps=None,
178
+ eval_strategy=IntervalStrategy.EPOCH,
179
+ eval_use_gather_object=False,
180
+ evaluation_strategy=epoch,
181
+ fp16=False,
182
+ fp16_backend=auto,
183
+ fp16_full_eval=False,
184
+ fp16_opt_level=O1,
185
+ fsdp=[],
186
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
187
+ fsdp_min_num_params=0,
188
+ fsdp_transformer_layer_cls_to_wrap=None,
189
+ full_determinism=False,
190
+ gradient_accumulation_steps=1,
191
+ gradient_checkpointing=False,
192
+ gradient_checkpointing_kwargs=None,
193
+ greater_is_better=False,
194
+ group_by_length=False,
195
+ half_precision_backend=auto,
196
+ hub_always_push=False,
197
+ hub_model_id=None,
198
+ hub_private_repo=False,
199
+ hub_strategy=HubStrategy.EVERY_SAVE,
200
+ hub_token=<HUB_TOKEN>,
201
+ ignore_data_skip=False,
202
+ include_inputs_for_metrics=False,
203
+ include_num_input_tokens_seen=False,
204
+ include_tokens_per_second=False,
205
+ jit_mode_eval=False,
206
+ label_names=None,
207
+ label_smoothing_factor=0.0,
208
+ learning_rate=5e-05,
209
+ length_column_name=length,
210
+ load_best_model_at_end=True,
211
+ local_rank=0,
212
+ log_level=passive,
213
+ log_level_replica=warning,
214
+ log_on_each_node=True,
215
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_08-30-54_lmgpu-node-02,
216
+ logging_first_step=False,
217
+ logging_nan_inf_filter=True,
218
+ logging_steps=500,
219
+ logging_strategy=IntervalStrategy.EPOCH,
220
+ lr_scheduler_kwargs={},
221
+ lr_scheduler_type=SchedulerType.LINEAR,
222
+ max_grad_norm=1.0,
223
+ max_steps=-1,
224
+ metric_for_best_model=loss,
225
+ mp_parameters=,
226
+ neftune_noise_alpha=None,
227
+ no_cuda=False,
228
+ num_train_epochs=3.0,
229
+ optim=OptimizerNames.ADAMW_TORCH,
230
+ optim_args=None,
231
+ optim_target_modules=None,
232
+ output_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy,
233
+ overwrite_output_dir=False,
234
+ past_index=-1,
235
+ per_device_eval_batch_size=8,
236
+ per_device_train_batch_size=8,
237
+ prediction_loss_only=False,
238
+ push_to_hub=True,
239
+ push_to_hub_model_id=None,
240
+ push_to_hub_organization=None,
241
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
242
+ ray_scope=last,
243
+ remove_unused_columns=True,
244
+ report_to=[],
245
+ restore_callback_states_from_checkpoint=False,
246
+ resume_from_checkpoint=None,
247
+ run_name=/home/iais_marenpielka/Bouthaina/res_nw_egy,
248
+ save_on_each_node=False,
249
+ save_only_model=False,
250
+ save_safetensors=True,
251
+ save_steps=500,
252
+ save_strategy=IntervalStrategy.EPOCH,
253
+ save_total_limit=None,
254
+ seed=42,
255
+ skip_memory_metrics=True,
256
+ split_batches=None,
257
+ tf32=None,
258
+ torch_compile=False,
259
+ torch_compile_backend=None,
260
+ torch_compile_mode=None,
261
+ torch_empty_cache_steps=None,
262
+ torchdynamo=None,
263
+ tpu_metrics_debug=False,
264
+ tpu_num_cores=None,
265
+ use_cpu=False,
266
+ use_ipex=False,
267
+ use_legacy_prediction_loop=False,
268
+ use_mps_device=False,
269
+ warmup_ratio=0.0,
270
+ warmup_steps=500,
271
+ weight_decay=0.0,
272
+ )
273
+ INFO:datasets.builder:Using custom data configuration default-93ed01be52df6f6e
274
+ INFO:datasets.info:Loading Dataset Infos from /home/iais_marenpielka/Bouthaina/miniconda3/lib/python3.12/site-packages/datasets/packaged_modules/text
275
+ INFO:datasets.builder:Overwrite dataset info from restored data version if exists.
276
+ INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
277
+ INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
278
+ INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
279
+ INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-4cda59a599643701.arrow
280
+ INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-d82ef9a45800c64f.arrow
281
  WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
282
+ INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-e14de7cbddab934f.arrow
283
+ INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-bac4c1d937397873.arrow
284
  WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
285
  WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
286
  INFO:__main__:Training/evaluation parameters TrainingArguments(
 
354
  log_level=passive,
355
  log_level_replica=warning,
356
  log_on_each_node=True,
357
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_08-34-56_lmgpu-node-02,
358
+ logging_first_step=False,
359
+ logging_nan_inf_filter=True,
360
+ logging_steps=500,
361
+ logging_strategy=IntervalStrategy.EPOCH,
362
+ lr_scheduler_kwargs={},
363
+ lr_scheduler_type=SchedulerType.LINEAR,
364
+ max_grad_norm=1.0,
365
+ max_steps=-1,
366
+ metric_for_best_model=loss,
367
+ mp_parameters=,
368
+ neftune_noise_alpha=None,
369
+ no_cuda=False,
370
+ num_train_epochs=3.0,
371
+ optim=OptimizerNames.ADAMW_TORCH,
372
+ optim_args=None,
373
+ optim_target_modules=None,
374
+ output_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy,
375
+ overwrite_output_dir=False,
376
+ past_index=-1,
377
+ per_device_eval_batch_size=8,
378
+ per_device_train_batch_size=8,
379
+ prediction_loss_only=False,
380
+ push_to_hub=True,
381
+ push_to_hub_model_id=None,
382
+ push_to_hub_organization=None,
383
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
384
+ ray_scope=last,
385
+ remove_unused_columns=True,
386
+ report_to=[],
387
+ restore_callback_states_from_checkpoint=False,
388
+ resume_from_checkpoint=None,
389
+ run_name=/home/iais_marenpielka/Bouthaina/res_nw_egy,
390
+ save_on_each_node=False,
391
+ save_only_model=False,
392
+ save_safetensors=True,
393
+ save_steps=500,
394
+ save_strategy=IntervalStrategy.EPOCH,
395
+ save_total_limit=None,
396
+ seed=42,
397
+ skip_memory_metrics=True,
398
+ split_batches=None,
399
+ tf32=None,
400
+ torch_compile=False,
401
+ torch_compile_backend=None,
402
+ torch_compile_mode=None,
403
+ torch_empty_cache_steps=None,
404
+ torchdynamo=None,
405
+ tpu_metrics_debug=False,
406
+ tpu_num_cores=None,
407
+ use_cpu=False,
408
+ use_ipex=False,
409
+ use_legacy_prediction_loop=False,
410
+ use_mps_device=False,
411
+ warmup_ratio=0.0,
412
+ warmup_steps=500,
413
+ weight_decay=0.0,
414
+ )
415
+ INFO:datasets.builder:Using custom data configuration default-93ed01be52df6f6e
416
+ INFO:datasets.info:Loading Dataset Infos from /home/iais_marenpielka/Bouthaina/miniconda3/lib/python3.12/site-packages/datasets/packaged_modules/text
417
+ INFO:datasets.builder:Overwrite dataset info from restored data version if exists.
418
+ INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
419
+ INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
420
+ INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
421
+ WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
422
+ INFO:__main__:Training/evaluation parameters TrainingArguments(
423
+ _n_gpu=1,
424
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
425
+ adafactor=False,
426
+ adam_beta1=0.9,
427
+ adam_beta2=0.999,
428
+ adam_epsilon=1e-08,
429
+ auto_find_batch_size=False,
430
+ batch_eval_metrics=False,
431
+ bf16=False,
432
+ bf16_full_eval=False,
433
+ data_seed=None,
434
+ dataloader_drop_last=False,
435
+ dataloader_num_workers=0,
436
+ dataloader_persistent_workers=False,
437
+ dataloader_pin_memory=True,
438
+ dataloader_prefetch_factor=None,
439
+ ddp_backend=None,
440
+ ddp_broadcast_buffers=None,
441
+ ddp_bucket_cap_mb=None,
442
+ ddp_find_unused_parameters=None,
443
+ ddp_timeout=1800,
444
+ debug=[],
445
+ deepspeed=None,
446
+ disable_tqdm=False,
447
+ dispatch_batches=None,
448
+ do_eval=True,
449
+ do_predict=False,
450
+ do_train=True,
451
+ eval_accumulation_steps=None,
452
+ eval_delay=0,
453
+ eval_do_concat_batches=True,
454
+ eval_on_start=False,
455
+ eval_steps=None,
456
+ eval_strategy=IntervalStrategy.EPOCH,
457
+ eval_use_gather_object=False,
458
+ evaluation_strategy=epoch,
459
+ fp16=False,
460
+ fp16_backend=auto,
461
+ fp16_full_eval=False,
462
+ fp16_opt_level=O1,
463
+ fsdp=[],
464
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
465
+ fsdp_min_num_params=0,
466
+ fsdp_transformer_layer_cls_to_wrap=None,
467
+ full_determinism=False,
468
+ gradient_accumulation_steps=1,
469
+ gradient_checkpointing=False,
470
+ gradient_checkpointing_kwargs=None,
471
+ greater_is_better=False,
472
+ group_by_length=False,
473
+ half_precision_backend=auto,
474
+ hub_always_push=False,
475
+ hub_model_id=None,
476
+ hub_private_repo=False,
477
+ hub_strategy=HubStrategy.EVERY_SAVE,
478
+ hub_token=<HUB_TOKEN>,
479
+ ignore_data_skip=False,
480
+ include_inputs_for_metrics=False,
481
+ include_num_input_tokens_seen=False,
482
+ include_tokens_per_second=False,
483
+ jit_mode_eval=False,
484
+ label_names=None,
485
+ label_smoothing_factor=0.0,
486
+ learning_rate=5e-05,
487
+ length_column_name=length,
488
+ load_best_model_at_end=True,
489
+ local_rank=0,
490
+ log_level=passive,
491
+ log_level_replica=warning,
492
+ log_on_each_node=True,
493
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_08-36-31_lmgpu-node-02,
494
+ logging_first_step=False,
495
+ logging_nan_inf_filter=True,
496
+ logging_steps=500,
497
+ logging_strategy=IntervalStrategy.EPOCH,
498
+ lr_scheduler_kwargs={},
499
+ lr_scheduler_type=SchedulerType.LINEAR,
500
+ max_grad_norm=1.0,
501
+ max_steps=-1,
502
+ metric_for_best_model=loss,
503
+ mp_parameters=,
504
+ neftune_noise_alpha=None,
505
+ no_cuda=False,
506
+ num_train_epochs=3.0,
507
+ optim=OptimizerNames.ADAMW_TORCH,
508
+ optim_args=None,
509
+ optim_target_modules=None,
510
+ output_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy,
511
+ overwrite_output_dir=False,
512
+ past_index=-1,
513
+ per_device_eval_batch_size=8,
514
+ per_device_train_batch_size=8,
515
+ prediction_loss_only=False,
516
+ push_to_hub=True,
517
+ push_to_hub_model_id=None,
518
+ push_to_hub_organization=None,
519
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
520
+ ray_scope=last,
521
+ remove_unused_columns=True,
522
+ report_to=[],
523
+ restore_callback_states_from_checkpoint=False,
524
+ resume_from_checkpoint=None,
525
+ run_name=/home/iais_marenpielka/Bouthaina/res_nw_egy,
526
+ save_on_each_node=False,
527
+ save_only_model=False,
528
+ save_safetensors=True,
529
+ save_steps=500,
530
+ save_strategy=IntervalStrategy.EPOCH,
531
+ save_total_limit=None,
532
+ seed=42,
533
+ skip_memory_metrics=True,
534
+ split_batches=None,
535
+ tf32=None,
536
+ torch_compile=False,
537
+ torch_compile_backend=None,
538
+ torch_compile_mode=None,
539
+ torch_empty_cache_steps=None,
540
+ torchdynamo=None,
541
+ tpu_metrics_debug=False,
542
+ tpu_num_cores=None,
543
+ use_cpu=False,
544
+ use_ipex=False,
545
+ use_legacy_prediction_loop=False,
546
+ use_mps_device=False,
547
+ warmup_ratio=0.0,
548
+ warmup_steps=500,
549
+ weight_decay=0.0,
550
+ )
551
+ INFO:datasets.builder:Using custom data configuration default-93ed01be52df6f6e
552
+ INFO:datasets.info:Loading Dataset Infos from /home/iais_marenpielka/Bouthaina/miniconda3/lib/python3.12/site-packages/datasets/packaged_modules/text
553
+ INFO:datasets.builder:Overwrite dataset info from restored data version if exists.
554
+ INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
555
+ INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
556
+ INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
557
+ WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
558
+ INFO:__main__:Training/evaluation parameters TrainingArguments(
559
+ _n_gpu=1,
560
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
561
+ adafactor=False,
562
+ adam_beta1=0.9,
563
+ adam_beta2=0.999,
564
+ adam_epsilon=1e-08,
565
+ auto_find_batch_size=False,
566
+ batch_eval_metrics=False,
567
+ bf16=False,
568
+ bf16_full_eval=False,
569
+ data_seed=None,
570
+ dataloader_drop_last=False,
571
+ dataloader_num_workers=0,
572
+ dataloader_persistent_workers=False,
573
+ dataloader_pin_memory=True,
574
+ dataloader_prefetch_factor=None,
575
+ ddp_backend=None,
576
+ ddp_broadcast_buffers=None,
577
+ ddp_bucket_cap_mb=None,
578
+ ddp_find_unused_parameters=None,
579
+ ddp_timeout=1800,
580
+ debug=[],
581
+ deepspeed=None,
582
+ disable_tqdm=False,
583
+ dispatch_batches=None,
584
+ do_eval=True,
585
+ do_predict=False,
586
+ do_train=True,
587
+ eval_accumulation_steps=None,
588
+ eval_delay=0,
589
+ eval_do_concat_batches=True,
590
+ eval_on_start=False,
591
+ eval_steps=None,
592
+ eval_strategy=IntervalStrategy.EPOCH,
593
+ eval_use_gather_object=False,
594
+ evaluation_strategy=epoch,
595
+ fp16=False,
596
+ fp16_backend=auto,
597
+ fp16_full_eval=False,
598
+ fp16_opt_level=O1,
599
+ fsdp=[],
600
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
601
+ fsdp_min_num_params=0,
602
+ fsdp_transformer_layer_cls_to_wrap=None,
603
+ full_determinism=False,
604
+ gradient_accumulation_steps=1,
605
+ gradient_checkpointing=False,
606
+ gradient_checkpointing_kwargs=None,
607
+ greater_is_better=False,
608
+ group_by_length=False,
609
+ half_precision_backend=auto,
610
+ hub_always_push=False,
611
+ hub_model_id=None,
612
+ hub_private_repo=False,
613
+ hub_strategy=HubStrategy.EVERY_SAVE,
614
+ hub_token=<HUB_TOKEN>,
615
+ ignore_data_skip=False,
616
+ include_inputs_for_metrics=False,
617
+ include_num_input_tokens_seen=False,
618
+ include_tokens_per_second=False,
619
+ jit_mode_eval=False,
620
+ label_names=None,
621
+ label_smoothing_factor=0.0,
622
+ learning_rate=5e-05,
623
+ length_column_name=length,
624
+ load_best_model_at_end=True,
625
+ local_rank=0,
626
+ log_level=passive,
627
+ log_level_replica=warning,
628
+ log_on_each_node=True,
629
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_08-37-48_lmgpu-node-02,
630
  logging_first_step=False,
631
  logging_nan_inf_filter=True,
632
  logging_steps=500,
 
690
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
691
  INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
692
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
693
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-15c48f6e90050931.arrow
694
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-6922352c25c70c34.arrow
695
  WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
696
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-4bc8134882fda7ba.arrow
697
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-42e32fa86dce08b8.arrow
698
  WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
699
  WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
700
  INFO:__main__:Training/evaluation parameters TrainingArguments(
 
768
  log_level=passive,
769
  log_level_replica=warning,
770
  log_on_each_node=True,
771
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_08-45-12_lmgpu-node-02,
772
+ logging_first_step=False,
773
+ logging_nan_inf_filter=True,
774
+ logging_steps=500,
775
+ logging_strategy=IntervalStrategy.EPOCH,
776
+ lr_scheduler_kwargs={},
777
+ lr_scheduler_type=SchedulerType.LINEAR,
778
+ max_grad_norm=1.0,
779
+ max_steps=-1,
780
+ metric_for_best_model=loss,
781
+ mp_parameters=,
782
+ neftune_noise_alpha=None,
783
+ no_cuda=False,
784
+ num_train_epochs=3.0,
785
+ optim=OptimizerNames.ADAMW_TORCH,
786
+ optim_args=None,
787
+ optim_target_modules=None,
788
+ output_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy,
789
+ overwrite_output_dir=False,
790
+ past_index=-1,
791
+ per_device_eval_batch_size=8,
792
+ per_device_train_batch_size=8,
793
+ prediction_loss_only=False,
794
+ push_to_hub=True,
795
+ push_to_hub_model_id=None,
796
+ push_to_hub_organization=None,
797
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
798
+ ray_scope=last,
799
+ remove_unused_columns=True,
800
+ report_to=[],
801
+ restore_callback_states_from_checkpoint=False,
802
+ resume_from_checkpoint=None,
803
+ run_name=/home/iais_marenpielka/Bouthaina/res_nw_egy,
804
+ save_on_each_node=False,
805
+ save_only_model=False,
806
+ save_safetensors=True,
807
+ save_steps=500,
808
+ save_strategy=IntervalStrategy.EPOCH,
809
+ save_total_limit=None,
810
+ seed=42,
811
+ skip_memory_metrics=True,
812
+ split_batches=None,
813
+ tf32=None,
814
+ torch_compile=False,
815
+ torch_compile_backend=None,
816
+ torch_compile_mode=None,
817
+ torch_empty_cache_steps=None,
818
+ torchdynamo=None,
819
+ tpu_metrics_debug=False,
820
+ tpu_num_cores=None,
821
+ use_cpu=False,
822
+ use_ipex=False,
823
+ use_legacy_prediction_loop=False,
824
+ use_mps_device=False,
825
+ warmup_ratio=0.0,
826
+ warmup_steps=500,
827
+ weight_decay=0.0,
828
+ )
829
+ INFO:datasets.builder:Using custom data configuration default-93ed01be52df6f6e
830
+ INFO:datasets.info:Loading Dataset Infos from /home/iais_marenpielka/Bouthaina/miniconda3/lib/python3.12/site-packages/datasets/packaged_modules/text
831
+ INFO:datasets.builder:Overwrite dataset info from restored data version if exists.
832
+ INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
833
+ INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
834
+ INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
835
+ WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
836
+ INFO:__main__:Training/evaluation parameters TrainingArguments(
837
+ _n_gpu=1,
838
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
839
+ adafactor=False,
840
+ adam_beta1=0.9,
841
+ adam_beta2=0.999,
842
+ adam_epsilon=1e-08,
843
+ auto_find_batch_size=False,
844
+ batch_eval_metrics=False,
845
+ bf16=False,
846
+ bf16_full_eval=False,
847
+ data_seed=None,
848
+ dataloader_drop_last=False,
849
+ dataloader_num_workers=0,
850
+ dataloader_persistent_workers=False,
851
+ dataloader_pin_memory=True,
852
+ dataloader_prefetch_factor=None,
853
+ ddp_backend=None,
854
+ ddp_broadcast_buffers=None,
855
+ ddp_bucket_cap_mb=None,
856
+ ddp_find_unused_parameters=None,
857
+ ddp_timeout=1800,
858
+ debug=[],
859
+ deepspeed=None,
860
+ disable_tqdm=False,
861
+ dispatch_batches=None,
862
+ do_eval=True,
863
+ do_predict=False,
864
+ do_train=True,
865
+ eval_accumulation_steps=None,
866
+ eval_delay=0,
867
+ eval_do_concat_batches=True,
868
+ eval_on_start=False,
869
+ eval_steps=None,
870
+ eval_strategy=IntervalStrategy.EPOCH,
871
+ eval_use_gather_object=False,
872
+ evaluation_strategy=epoch,
873
+ fp16=False,
874
+ fp16_backend=auto,
875
+ fp16_full_eval=False,
876
+ fp16_opt_level=O1,
877
+ fsdp=[],
878
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
879
+ fsdp_min_num_params=0,
880
+ fsdp_transformer_layer_cls_to_wrap=None,
881
+ full_determinism=False,
882
+ gradient_accumulation_steps=1,
883
+ gradient_checkpointing=False,
884
+ gradient_checkpointing_kwargs=None,
885
+ greater_is_better=False,
886
+ group_by_length=False,
887
+ half_precision_backend=auto,
888
+ hub_always_push=False,
889
+ hub_model_id=None,
890
+ hub_private_repo=False,
891
+ hub_strategy=HubStrategy.EVERY_SAVE,
892
+ hub_token=<HUB_TOKEN>,
893
+ ignore_data_skip=False,
894
+ include_inputs_for_metrics=False,
895
+ include_num_input_tokens_seen=False,
896
+ include_tokens_per_second=False,
897
+ jit_mode_eval=False,
898
+ label_names=None,
899
+ label_smoothing_factor=0.0,
900
+ learning_rate=5e-05,
901
+ length_column_name=length,
902
+ load_best_model_at_end=True,
903
+ local_rank=0,
904
+ log_level=passive,
905
+ log_level_replica=warning,
906
+ log_on_each_node=True,
907
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_08-49-29_lmgpu-node-02,
908
  logging_first_step=False,
909
  logging_nan_inf_filter=True,
910
  logging_steps=500,
 
1040
  log_level=passive,
1041
  log_level_replica=warning,
1042
  log_on_each_node=True,
1043
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_08-54-02_lmgpu-node-02,
1044
  logging_first_step=False,
1045
  logging_nan_inf_filter=True,
1046
  logging_steps=500,
 
1176
  log_level=passive,
1177
  log_level_replica=warning,
1178
  log_on_each_node=True,
1179
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_08-55-36_lmgpu-node-02,
1180
  logging_first_step=False,
1181
  logging_nan_inf_filter=True,
1182
  logging_steps=500,
 
1240
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
1241
  INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
1242
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
1243
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-1618ed1dbdc1fac3.arrow
1244
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-4dd41ff5ffd8ebb9.arrow
1245
  WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
1246
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-968ccd46e919b580.arrow
1247
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-fc6af2bb14f3f4a5.arrow
1248
  WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
1249
  WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
1250
  INFO:__main__:Training/evaluation parameters TrainingArguments(
 
1318
  log_level=passive,
1319
  log_level_replica=warning,
1320
  log_on_each_node=True,
1321
+ logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_egy/runs/Aug26_09-01-10_lmgpu-node-02,
1322
  logging_first_step=False,
1323
  logging_nan_inf_filter=True,
1324
  logging_steps=500,
 
1382
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
1383
  INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
1384
  INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101
1385
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-8ff24a6e5f07e751.arrow
1386
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-24c3792376099a13.arrow
1387
  WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
1388
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-7eeb52512f3f992e.arrow
1389
+ INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-93ed01be52df6f6e/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-c6b81df4fac91850.arrow
1390
  WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
1391
  INFO:root:Epoch 1.0: Train Loss = None, Eval Loss = None
1392
  INFO:absl:Using default tokenizer.
 
 
 
 
 
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e0e5f1169bcbd77625282e5c08ab19b475c7735aaf2f2cc78a0878271a1a2a2
3
  size 539221632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bc793473cba110a469709dabdfd86c02a23876b3cec7ad4127b45d7df83c22f
3
  size 539221632
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 128,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 128
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 64000,
16
+ "pad_type_id": 0,
17
+ "pad_token": "<EOS>"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dd387c20f29a9602c739eddd06e5cc1283cac4ac8c19e05cd6e5a99446c6a7e
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f45e298f33c57c9b0a30141fc68a3f3abb5e9893518b28a61febed9782ea88e
3
  size 5240