File size: 7,545 Bytes
1e69ad9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43af51d
1e69ad9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43af51d
1e69ad9
43af51d
 
1e69ad9
 
 
 
 
43af51d
 
 
1e69ad9
43af51d
 
1e69ad9
 
 
5943af9
 
d2852a9
 
48a66d6
 
0dc7374
 
9accf46
 
623c8a7
 
59eb18b
 
2cfdded
 
e50e380
 
6e53a0b
 
860ddd4
 
8e0f975
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False
INFO:__main__:Training/evaluation parameters TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=True,
do_predict=False,
do_train=True,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=IntervalStrategy.EPOCH,
eval_use_gather_object=False,
evaluation_strategy=epoch,
fp16=False,
fp16_backend=auto,
fp16_full_eval=False,
fp16_opt_level=O1,
fsdp=[],
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
fsdp_min_num_params=0,
fsdp_transformer_layer_cls_to_wrap=None,
full_determinism=False,
gradient_accumulation_steps=1,
gradient_checkpointing=False,
gradient_checkpointing_kwargs=None,
greater_is_better=False,
group_by_length=False,
half_precision_backend=auto,
hub_always_push=False,
hub_model_id=None,
hub_private_repo=False,
hub_strategy=HubStrategy.EVERY_SAVE,
hub_token=<HUB_TOKEN>,
ignore_data_skip=False,
include_inputs_for_metrics=False,
include_num_input_tokens_seen=False,
include_tokens_per_second=False,
jit_mode_eval=False,
label_names=None,
label_smoothing_factor=0.0,
learning_rate=5e-05,
length_column_name=length,
load_best_model_at_end=True,
local_rank=0,
log_level=passive,
log_level_replica=warning,
log_on_each_node=True,
logging_dir=/home/iais_marenpielka/Bouthaina/res_nw_lev/runs/Sep01_15-12-59_lmgpu-node-07,
logging_first_step=False,
logging_nan_inf_filter=True,
logging_steps=500,
logging_strategy=IntervalStrategy.EPOCH,
lr_scheduler_kwargs={},
lr_scheduler_type=SchedulerType.LINEAR,
max_grad_norm=1.0,
max_steps=-1,
metric_for_best_model=loss,
mp_parameters=,
neftune_noise_alpha=None,
no_cuda=False,
num_train_epochs=20.0,
optim=OptimizerNames.ADAMW_TORCH,
optim_args=None,
optim_target_modules=None,
output_dir=/home/iais_marenpielka/Bouthaina/res_nw_lev,
overwrite_output_dir=False,
past_index=-1,
per_device_eval_batch_size=8,
per_device_train_batch_size=8,
prediction_loss_only=False,
push_to_hub=True,
push_to_hub_model_id=None,
push_to_hub_organization=None,
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
ray_scope=last,
remove_unused_columns=True,
report_to=[],
restore_callback_states_from_checkpoint=False,
resume_from_checkpoint=None,
run_name=/home/iais_marenpielka/Bouthaina/res_nw_lev,
save_on_each_node=False,
save_only_model=False,
save_safetensors=True,
save_steps=500,
save_strategy=IntervalStrategy.EPOCH,
save_total_limit=None,
seed=42,
skip_memory_metrics=True,
split_batches=None,
tf32=None,
torch_compile=False,
torch_compile_backend=None,
torch_compile_mode=None,
torch_empty_cache_steps=None,
torchdynamo=None,
tpu_metrics_debug=False,
tpu_num_cores=None,
use_cpu=False,
use_ipex=False,
use_legacy_prediction_loop=False,
use_mps_device=False,
warmup_ratio=0.0,
warmup_steps=500,
weight_decay=0.0,
)
INFO:datasets.builder:Using custom data configuration default-64458019b70d880f
INFO:datasets.info:Loading Dataset Infos from /home/iais_marenpielka/Bouthaina/miniconda3/lib/python3.12/site-packages/datasets/packaged_modules/text
INFO:datasets.builder:Generating dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-64458019b70d880f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101)
INFO:datasets.builder:Downloading and preparing dataset text/default to /home/iais_marenpielka/.cache/huggingface/datasets/text/default-64458019b70d880f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101...
INFO:datasets.download.download_manager:Downloading took 0.0 min
INFO:datasets.download.download_manager:Checksum Computation took 0.0 min
INFO:datasets.builder:Generating train split
INFO:datasets.builder:Generating validation split
INFO:datasets.utils.info_utils:Unable to verify splits sizes.
INFO:datasets.builder:Dataset text downloaded and prepared to /home/iais_marenpielka/.cache/huggingface/datasets/text/default-64458019b70d880f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101. Subsequent calls will reuse this data.
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-64458019b70d880f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-5d0d70159f9c2d00.arrow
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-64458019b70d880f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-baeede9d9358f9df.arrow
WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx.
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-64458019b70d880f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-0b6ec53ca05a7636.arrow
INFO:datasets.arrow_dataset:Caching processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-64458019b70d880f/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-80e1ecf4644c0912.arrow
WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
INFO:root:Epoch 1.0: Train Loss = None, Eval Loss = None
INFO:absl:Using default tokenizer.
INFO:root:Epoch 2.0: Train Loss = 0.8553, Eval Loss = 0.5277819633483887
INFO:absl:Using default tokenizer.
INFO:root:Epoch 3.0: Train Loss = 0.4665, Eval Loss = 0.46819329261779785
INFO:absl:Using default tokenizer.
INFO:root:Epoch 4.0: Train Loss = 0.3762, Eval Loss = 0.43943917751312256
INFO:absl:Using default tokenizer.
INFO:root:Epoch 5.0: Train Loss = 0.3096, Eval Loss = 0.42224156856536865
INFO:absl:Using default tokenizer.
INFO:root:Epoch 6.0: Train Loss = 0.2588, Eval Loss = 0.4118480682373047
INFO:absl:Using default tokenizer.
INFO:root:Epoch 7.0: Train Loss = 0.2202, Eval Loss = 0.4063816964626312
INFO:absl:Using default tokenizer.
INFO:root:Epoch 8.0: Train Loss = 0.1906, Eval Loss = 0.4055454134941101
INFO:absl:Using default tokenizer.
INFO:root:Epoch 9.0: Train Loss = 0.1676, Eval Loss = 0.40759241580963135
INFO:absl:Using default tokenizer.
INFO:root:Epoch 10.0: Train Loss = 0.1502, Eval Loss = 0.41223180294036865
INFO:absl:Using default tokenizer.
INFO:root:Epoch 11.0: Train Loss = 0.1371, Eval Loss = 0.4181581735610962
INFO:absl:Using default tokenizer.
INFO:root:Epoch 12.0: Train Loss = 0.1275, Eval Loss = 0.4227945804595947
INFO:absl:Using default tokenizer.
INFO:__main__:*** Evaluate ***
INFO:absl:Using default tokenizer.