Training in progress, step 1000
Browse files- README.md +14 -43
- all_results.json +12 -12
- config.json +1 -1
- eval_results.json +7 -7
- indicwav2vec_trainwtags_MUCS_warmup2000_s300shuff100_2217857.out +1 -1
- indicwav2vec_trainwtags_MUCS_warmup2000_s300shuff100_2221377.out +0 -0
- train_results.json +6 -6
- trainer_state.json +0 -0
- training_args.bin +1 -1
README.md
CHANGED
@@ -11,14 +11,14 @@ model-index:
|
|
11 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
should probably proofread and complete it, then remove this comment. -->
|
13 |
|
14 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/priyanshipal/huggingface/runs/
|
15 |
# output
|
16 |
|
17 |
This model was trained from scratch on an unknown dataset.
|
18 |
It achieves the following results on the evaluation set:
|
19 |
-
- Loss:
|
20 |
-
- Cer: 0.
|
21 |
-
- Wer: 0.
|
22 |
|
23 |
## Model description
|
24 |
|
@@ -45,49 +45,20 @@ The following hyperparameters were used during training:
|
|
45 |
- total_train_batch_size: 32
|
46 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
47 |
- lr_scheduler_type: linear
|
48 |
-
- lr_scheduler_warmup_steps:
|
49 |
-
- training_steps:
|
50 |
- mixed_precision_training: Native AMP
|
51 |
|
52 |
### Training results
|
53 |
|
54 |
-
| Training Loss | Epoch
|
55 |
-
|
56 |
-
|
|
57 |
-
|
|
58 |
-
|
|
59 |
-
| 3.
|
60 |
-
| 3.
|
61 |
-
|
|
62 |
-
| 1.7013 | 1.12 | 700 | 2.1151 | 0.4489 | 0.6820 |
|
63 |
-
| 1.2995 | 1.28 | 800 | 2.0757 | 0.4160 | 0.6245 |
|
64 |
-
| 1.6852 | 1.44 | 900 | 1.9870 | 0.4112 | 0.6154 |
|
65 |
-
| 1.3997 | 1.6 | 1000 | 2.0007 | 0.3962 | 0.6069 |
|
66 |
-
| 1.768 | 1.76 | 1100 | 2.0712 | 0.4123 | 0.6448 |
|
67 |
-
| 2.5192 | 1.92 | 1200 | 2.5729 | 0.6884 | 0.9178 |
|
68 |
-
| 2.6077 | 2.08 | 1300 | 2.4078 | 0.4816 | 0.8066 |
|
69 |
-
| 2.6928 | 2.24 | 1400 | 2.3596 | 0.4904 | 0.7915 |
|
70 |
-
| 0.0 | 2.4 | 1500 | 2.4471 | 0.6019 | 0.8782 |
|
71 |
-
| 0.0 | 2.56 | 1600 | 2.4490 | 0.6112 | 0.8888 |
|
72 |
-
| 0.0 | 2.7200 | 1700 | nan | 1.0 | 1.0 |
|
73 |
-
| 0.0 | 2.88 | 1800 | nan | 1.0 | 1.0 |
|
74 |
-
| 0.0 | 3.04 | 1900 | nan | 1.0 | 1.0 |
|
75 |
-
| 0.0 | 3.2 | 2000 | nan | 1.0 | 1.0 |
|
76 |
-
| 0.0 | 3.36 | 2100 | nan | 1.0 | 1.0 |
|
77 |
-
| 0.0 | 3.52 | 2200 | nan | 1.0 | 1.0 |
|
78 |
-
| 0.0 | 3.68 | 2300 | nan | 1.0 | 1.0 |
|
79 |
-
| 0.0 | 3.84 | 2400 | nan | 1.0 | 1.0 |
|
80 |
-
| 0.0 | 4.0 | 2500 | nan | 1.0 | 1.0 |
|
81 |
-
| 0.0 | 4.16 | 2600 | nan | 1.0 | 1.0 |
|
82 |
-
| 0.0 | 4.32 | 2700 | nan | 1.0 | 1.0 |
|
83 |
-
| 0.0 | 4.48 | 2800 | nan | 1.0 | 1.0 |
|
84 |
-
| 0.0 | 4.64 | 2900 | nan | 1.0 | 1.0 |
|
85 |
-
| 0.0 | 4.8 | 3000 | nan | 1.0 | 1.0 |
|
86 |
-
| 0.0 | 4.96 | 3100 | nan | 1.0 | 1.0 |
|
87 |
-
| 0.0 | 5.12 | 3200 | nan | 1.0 | 1.0 |
|
88 |
-
| 0.0 | 5.28 | 3300 | nan | 1.0 | 1.0 |
|
89 |
-
| 0.0 | 5.44 | 3400 | nan | 1.0 | 1.0 |
|
90 |
-
| 0.0 | 5.6 | 3500 | nan | 1.0 | 1.0 |
|
91 |
|
92 |
|
93 |
### Framework versions
|
|
|
11 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
should probably proofread and complete it, then remove this comment. -->
|
13 |
|
14 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/priyanshipal/huggingface/runs/hvo6b5jz)
|
15 |
# output
|
16 |
|
17 |
This model was trained from scratch on an unknown dataset.
|
18 |
It achieves the following results on the evaluation set:
|
19 |
+
- Loss: 1.9947
|
20 |
+
- Cer: 0.4133
|
21 |
+
- Wer: 0.6195
|
22 |
|
23 |
## Model description
|
24 |
|
|
|
45 |
- total_train_batch_size: 32
|
46 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
47 |
- lr_scheduler_type: linear
|
48 |
+
- lr_scheduler_warmup_steps: 2000
|
49 |
+
- training_steps: 6000
|
50 |
- mixed_precision_training: Native AMP
|
51 |
|
52 |
### Training results
|
53 |
|
54 |
+
| Training Loss | Epoch | Step | Validation Loss | Cer | Wer |
|
55 |
+
|:-------------:|:-----:|:----:|:---------------:|:------:|:------:|
|
56 |
+
| 1.5851 | 1.6 | 1000 | 1.9947 | 0.4133 | 0.6195 |
|
57 |
+
| 1.8352 | 3.2 | 2000 | 2.1491 | 0.4724 | 0.7895 |
|
58 |
+
| 2.3755 | 4.8 | 3000 | 2.3793 | 0.4433 | 0.7270 |
|
59 |
+
| 3.3134 | 6.4 | 4000 | 3.3025 | 0.5204 | 0.8033 |
|
60 |
+
| 3.4098 | 8.0 | 5000 | 3.2885 | 0.5196 | 0.8050 |
|
61 |
+
| 3.1155 | 9.6 | 6000 | 3.2885 | 0.5196 | 0.8050 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
|
64 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,16 +1,16 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_cer": 0.
|
4 |
-
"eval_loss":
|
5 |
-
"eval_runtime":
|
6 |
"eval_samples": 3136,
|
7 |
-
"eval_samples_per_second": 19.
|
8 |
-
"eval_steps_per_second": 1.
|
9 |
-
"eval_wer": 0.
|
10 |
-
"total_flos":
|
11 |
-
"train_loss":
|
12 |
-
"train_runtime":
|
13 |
"train_samples": 20000,
|
14 |
-
"train_samples_per_second":
|
15 |
-
"train_steps_per_second": 0.
|
16 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 9.6,
|
3 |
+
"eval_cer": 0.4133216406903974,
|
4 |
+
"eval_loss": 1.9947007894515991,
|
5 |
+
"eval_runtime": 158.2803,
|
6 |
"eval_samples": 3136,
|
7 |
+
"eval_samples_per_second": 19.813,
|
8 |
+
"eval_steps_per_second": 1.238,
|
9 |
+
"eval_wer": 0.6194798466480157,
|
10 |
+
"total_flos": 3.700768773245485e+19,
|
11 |
+
"train_loss": 2.825458660195271,
|
12 |
+
"train_runtime": 12931.7591,
|
13 |
"train_samples": 20000,
|
14 |
+
"train_samples_per_second": 14.847,
|
15 |
+
"train_steps_per_second": 0.464
|
16 |
}
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "/scratch/elec/puhe/p/palp3/MUCS/
|
3 |
"activation_dropout": 0.0,
|
4 |
"adapter_attn_dim": null,
|
5 |
"adapter_kernel_size": 3,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "/m/triton/scratch/elec/puhe/p/palp3/MUCS/indicwav2vec-hindi",
|
3 |
"activation_dropout": 0.0,
|
4 |
"adapter_attn_dim": null,
|
5 |
"adapter_kernel_size": 3,
|
eval_results.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"eval_cer": 0.
|
4 |
-
"eval_loss":
|
5 |
-
"eval_runtime":
|
6 |
"eval_samples": 3136,
|
7 |
-
"eval_samples_per_second": 19.
|
8 |
-
"eval_steps_per_second": 1.
|
9 |
-
"eval_wer": 0.
|
10 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 9.6,
|
3 |
+
"eval_cer": 0.4133216406903974,
|
4 |
+
"eval_loss": 1.9947007894515991,
|
5 |
+
"eval_runtime": 158.2803,
|
6 |
"eval_samples": 3136,
|
7 |
+
"eval_samples_per_second": 19.813,
|
8 |
+
"eval_steps_per_second": 1.238,
|
9 |
+
"eval_wer": 0.6194798466480157
|
10 |
}
|
indicwav2vec_trainwtags_MUCS_warmup2000_s300shuff100_2217857.out
CHANGED
@@ -2556,4 +2556,4 @@ last prediction string
|
|
2556 |
return fn(*args, **kwargs)
|
2557 |
/scratch/work/palp3/myenv/lib/python3.11/site-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.
|
2558 |
with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
|
2559 |
-
|
2560 |
10%|β | 2001/20000 [1:12:23<261:02:18, 52.21s/it]
|
2561 |
|
2562 |
10%|β | 2001/20000 [1:12:23<261:02:18, 52.21s/it]
|
2563 |
10%|β | 2002/20000 [1:12:26<186:03:50, 37.22s/it]
|
2564 |
|
2565 |
10%|β | 2002/20000 [1:12:26<186:03:50, 37.22s/it]
|
2566 |
10%|β | 2003/20000 [1:12:28<133:40:18, 26.74s/it]
|
2567 |
|
2568 |
10%|β | 2003/20000 [1:12:28<133:40:18, 26.74s/it]
|
2569 |
10%|β | 2004/20000 [1:12:30<96:55:36, 19.39s/it]
|
2570 |
|
2571 |
10%|β | 2004/20000 [1:12:30<96:55:36, 19.39s/it]
|
2572 |
10%|β | 2005/20000 [1:12:32<71:03:43, 14.22s/it]
|
2573 |
|
2574 |
10%|β | 2005/20000 [1:12:32<71:03:43, 14.22s/it]
|
2575 |
10%|β | 2006/20000 [1:12:35<53:12:27, 10.65s/it]
|
2576 |
|
2577 |
10%|β | 2006/20000 [1:12:35<53:12:27, 10.65s/it]
|
|
|
2578 |
10%|β | 2001/20000 [1:12:23<261:02:18, 52.21s/it]
|
2579 |
|
2580 |
10%|β | 2001/20000 [1:12:23<261:02:18, 52.21s/it]
|
2581 |
10%|β | 2002/20000 [1:12:26<186:03:50, 37.22s/it]
|
2582 |
|
2583 |
10%|β | 2002/20000 [1:12:26<186:03:50, 37.22s/it]
|
2584 |
10%|β | 2003/20000 [1:12:28<133:40:18, 26.74s/it]
|
2585 |
|
2586 |
10%|β | 2003/20000 [1:12:28<133:40:18, 26.74s/it]
|
2587 |
10%|β | 2004/20000 [1:12:30<96:55:36, 19.39s/it]
|
2588 |
|
2589 |
10%|β | 2004/20000 [1:12:30<96:55:36, 19.39s/it]
|
2590 |
10%|β | 2005/20000 [1:12:32<71:03:43, 14.22s/it]
|
2591 |
|
2592 |
10%|β | 2005/20000 [1:12:32<71:03:43, 14.22s/it]
|
2593 |
10%|β | 2006/20000 [1:12:35<53:12:27, 10.65s/it]
|
2594 |
|
2595 |
10%|β | 2006/20000 [1:12:35<53:12:27, 10.65s/it]
|
2596 |
10%|β | 2007/20000 [1:12:37<40:23:30, 8.08s/it]
|
2597 |
|
2598 |
10%|β | 2007/20000 [1:12:37<40:23:30, 8.08s/it]
|
2599 |
10%|β | 2008/20000 [1:12:39<31:07:28, 6.23s/it]
|
2600 |
|
2601 |
10%|β | 2008/20000 [1:12:39<31:07:28, 6.23s/it]
|
2602 |
10%|β | 2009/20000 [1:12:40<24:05:58, 4.82s/it]
|
2603 |
|
2604 |
10%|β | 2009/20000 [1:12:40<24:05:58, 4.82s/it]
|
2605 |
10%|β | 2010/20000 [1:12:42<19:29:58, 3.90s/it]
|
2606 |
|
2607 |
10%|β | 2010/20000 [1:12:42<19:29:58, 3.90s/it]
|
2608 |
10%|β | 2011/20000 [1:12:43<16:01:15, 3.21s/it]
|
2609 |
|
2610 |
10%|β | 2011/20000 [1:12:43<16:01:15, 3.21s/it]
|
2611 |
10%|β | 2012/20000 [1:12:45<13:43:17, 2.75s/it]
|
2612 |
|
2613 |
10%|β | 2012/20000 [1:12:45<13:43:17, 2.75s/it]
|
2614 |
10%|β | 2013/20000 [1:12:47<12:04:33, 2.42s/it]
|
2615 |
|
2616 |
10%|β | 2013/20000 [1:12:47<12:04:33, 2.42s/it]
|
2617 |
10%|β | 2014/20000 [1:12:48<11:00:26, 2.20s/it]
|
2618 |
|
2619 |
10%|β | 2014/20000 [1:12:48<11:00:26, 2.20s/it]
|
2620 |
10%|β | 2015/20000 [1:12:50<10:22:44, 2.08s/it]
|
2621 |
|
2622 |
10%|β | 2015/20000 [1:12:50<10:22:44, 2.08s/it]
|
2623 |
10%|β | 2016/20000 [1:12:52<9:14:52, 1.85s/it]
|
2624 |
|
2625 |
10%|β | 2016/20000 [1:12:52<9:14:52, 1.85s/it]
|
2626 |
10%|β | 2017/20000 [1:12:53<8:27:59, 1.69s/it]
|
2627 |
|
2628 |
10%|β | 2017/20000 [1:12:53<8:27:59, 1.69s/it]
|
2629 |
10%|β | 2018/20000 [1:12:54<7:53:29, 1.58s/it]
|
2630 |
|
2631 |
10%|β | 2018/20000 [1:12:54<7:53:29, 1.58s/it]
|
2632 |
10%|β | 2019/20000 [1:12:56<7:42:42, 1.54s/it]
|
2633 |
|
2634 |
10%|β | 2019/20000 [1:12:56<7:42:42, 1.54s/it]
|
2635 |
10%|β | 2020/20000 [1:12:57<7:34:48, 1.52s/it]
|
2636 |
|
2637 |
10%|β | 2020/20000 [1:12:57<7:34:48, 1.52s/it]
|
2638 |
10%|β | 2021/20000 [1:12:58<7:10:37, 1.44s/it]
|
2639 |
|
2640 |
10%|β | 2021/20000 [1:12:58<7:10:37, 1.44s/it]
|
2641 |
10%|β | 2022/20000 [1:12:59<6:31:49, 1.31s/it]
|
2642 |
|
2643 |
10%|β | 2022/20000 [1:12:59<6:31:49, 1.31s/it]
|
2644 |
10%|β | 2023/20000 [1:13:00<5:55:41, 1.19s/it]
|
2645 |
|
2646 |
10%|β | 2023/20000 [1:13:00<5:55:41, 1.19s/it]
|
2647 |
10%|β | 2024/20000 [1:13:01<5:38:57, 1.13s/it]
|
2648 |
|
2649 |
10%|β | 2024/20000 [1:13:01<5:38:57, 1.13s/it]
|
2650 |
10%|β | 2025/20000 [1:13:05<8:55:39, 1.79s/it]
|
2651 |
|
2652 |
10%|β | 2025/20000 [1:13:05<8:55:39, 1.79s/it]
|
2653 |
10%|β | 2026/20000 [1:13:12<17:31:16, 3.51s/it]
|
2654 |
|
2655 |
10%|β | 2026/20000 [1:13:12<17:31:16, 3.51s/it]
|
2656 |
10%|β | 2027/20000 [1:13:17<19:54:15, 3.99s/it]
|
2657 |
|
2658 |
10%|β | 2027/20000 [1:13:17<19:54:15, 3.99s/it]
|
2659 |
10%|β | 2028/20000 [1:13:22<20:49:08, 4.17s/it]
|
2660 |
|
2661 |
10%|β | 2028/20000 [1:13:22<20:49:08, 4.17s/it]
|
2662 |
10%|β | 2029/20000 [1:13:26<20:19:13, 4.07s/it]
|
2663 |
|
2664 |
10%|β | 2029/20000 [1:13:26<20:19:13, 4.07s/it]
|
2665 |
10%|β | 2030/20000 [1:13:30<20:11:29, 4.05s/it]
|
2666 |
|
2667 |
10%|β | 2030/20000 [1:13:30<20:11:29, 4.05s/it]
|
2668 |
10%|β | 2031/20000 [1:13:33<19:06:48, 3.83s/it]
|
2669 |
|
2670 |
10%|β | 2031/20000 [1:13:33<19:06:48, 3.83s/it]
|
2671 |
10%|β | 2032/20000 [1:13:36<18:32:02, 3.71s/it]
|
2672 |
|
2673 |
10%|β | 2032/20000 [1:13:36<18:32:02, 3.71s/it]
|
2674 |
10%|β | 2033/20000 [1:13:40<17:43:54, 3.55s/it]
|
2675 |
|
2676 |
10%|β | 2033/20000 [1:13:40<17:43:54, 3.55s/it]
|
2677 |
10%|β | 2034/20000 [1:13:43<17:26:07, 3.49s/it]
|
2678 |
|
2679 |
10%|β | 2034/20000 [1:13:43<17:26:07, 3.49s/it]
|
2680 |
10%|β | 2035/20000 [1:13:46<16:33:57, 3.32s/it]
|
2681 |
|
2682 |
10%|β | 2035/20000 [1:13:46<16:33:57, 3.32s/it]
|
2683 |
10%|β | 2036/20000 [1:13:49<15:50:22, 3.17s/it]
|
2684 |
|
2685 |
10%|β | 2036/20000 [1:13:49<15:50:22, 3.17s/it]
|
2686 |
10%|β | 2037/20000 [1:13:52<15:14:17, 3.05s/it]
|
2687 |
|
2688 |
10%|β | 2037/20000 [1:13:52<15:14:17, 3.05s/it]
|
2689 |
10%|β | 2038/20000 [1:13:54<14:50:37, 2.98s/it]
|
2690 |
|
2691 |
10%|β | 2038/20000 [1:13:54<14:50:37, 2.98s/it]
|
2692 |
10%|β | 2039/20000 [1:13:57<14:10:23, 2.84s/it]
|
2693 |
|
2694 |
10%|β | 2039/20000 [1:13:57<14:10:23, 2.84s/it]
|
2695 |
10%|β | 2040/20000 [1:13:59<13:46:35, 2.76s/it]
|
2696 |
|
2697 |
10%|β | 2040/20000 [1:13:59<13:46:35, 2.76s/it]
|
2698 |
10%|β | 2041/20000 [1:14:02<13:40:55, 2.74s/it]
|
2699 |
|
2700 |
10%|β | 2041/20000 [1:14:02<13:40:55, 2.74s/it]
|
2701 |
10%|β | 2042/20000 [1:14:05<13:11:09, 2.64s/it]
|
2702 |
|
2703 |
10%|β | 2042/20000 [1:14:05<13:11:09, 2.64s/it]
|
2704 |
10%|β | 2043/20000 [1:14:07<12:46:32, 2.56s/it]
|
2705 |
|
2706 |
10%|β | 2043/20000 [1:14:07<12:46:32, 2.56s/it]
|
2707 |
10%|β | 2044/20000 [1:14:09<12:05:13, 2.42s/it]
|
2708 |
|
2709 |
10%|β | 2044/20000 [1:14:09<12:05:13, 2.42s/it]
|
2710 |
10%|β | 2045/20000 [1:14:11<11:55:28, 2.39s/it]
|
2711 |
|
2712 |
10%|β | 2045/20000 [1:14:11<11:55:28, 2.39s/it]
|
2713 |
10%|β | 2046/20000 [1:14:13<11:27:33, 2.30s/it]
|
2714 |
|
2715 |
10%|β | 2046/20000 [1:14:13<11:27:33, 2.30s/it]
|
2716 |
10%|β | 2047/20000 [1:14:16<11:13:50, 2.25s/it]
|
2717 |
|
2718 |
10%|β | 2047/20000 [1:14:16<11:13:50, 2.25s/it]
|
2719 |
10%|β | 2048/20000 [1:14:18<11:07:04, 2.23s/it]
|
2720 |
|
2721 |
10%|β | 2048/20000 [1:14:18<11:07:04, 2.23s/it]
|
2722 |
10%|β | 2049/20000 [1:14:20<10:48:18, 2.17s/it]
|
2723 |
|
2724 |
10%|β | 2049/20000 [1:14:20<10:48:18, 2.17s/it]
|
2725 |
10%|β | 2050/20000 [1:14:22<10:25:43, 2.09s/it]
|
2726 |
|
2727 |
10%|β | 2050/20000 [1:14:22<10:25:43, 2.09s/it]
|
2728 |
10%|β | 2051/20000 [1:14:24<10:11:00, 2.04s/it]
|
2729 |
|
2730 |
10%|β | 2051/20000 [1:14:24<10:11:00, 2.04s/it]
|
2731 |
10%|β | 2052/20000 [1:14:26<10:00:50, 2.01s/it]
|
2732 |
|
2733 |
10%|β | 2052/20000 [1:14:26<10:00:50, 2.01s/it]
|
2734 |
10%|β | 2053/20000 [1:14:28<10:08:46, 2.04s/it]
|
2735 |
|
2736 |
10%|β | 2053/20000 [1:14:28<10:08:46, 2.04s/it]
|
2737 |
10%|β | 2054/20000 [1:14:30<9:57:09, 2.00s/it]
|
2738 |
|
2739 |
10%|β | 2054/20000 [1:14:30<9:57:09, 2.00s/it]
|
2740 |
10%|β | 2055/20000 [1:14:32<10:01:01, 2.01s/it]
|
2741 |
|
2742 |
10%|β | 2055/20000 [1:14:32<10:01:01, 2.01s/it]
|
2743 |
10%|β | 2056/20000 [1:14:34<9:57:36, 2.00s/it]
|
2744 |
|
2745 |
10%|β | 2056/20000 [1:14:34<9:57:36, 2.00s/it]
|
2746 |
10%|β | 2057/20000 [1:14:35<9:18:11, 1.87s/it]
|
2747 |
|
2748 |
10%|β | 2057/20000 [1:14:35<9:18:11, 1.87s/it]
|
2749 |
10%|β | 2058/20000 [1:14:37<8:59:02, 1.80s/it]
|
2750 |
|
2751 |
10%|β | 2058/20000 [1:14:37<8:59:02, 1.80s/it]
|
2752 |
10%|β | 2059/20000 [1:14:38<8:39:57, 1.74s/it]
|
2753 |
|
2754 |
10%|β | 2059/20000 [1:14:38<8:39:57, 1.74s/it]
|
2755 |
10%|β | 2060/20000 [1:14:40<8:43:16, 1.75s/it]
|
2756 |
|
2757 |
10%|β | 2060/20000 [1:14:40<8:43:16, 1.75s/it]
|
2758 |
10%|β | 2061/20000 [1:14:42<8:19:10, 1.67s/it]
|
2759 |
|
2760 |
10%|β | 2061/20000 [1:14:42<8:19:10, 1.67s/it]
|
2761 |
10%|β | 2062/20000 [1:14:43<8:08:40, 1.63s/it]
|
2762 |
|
2763 |
10%|β | 2062/20000 [1:14:43<8:08:40, 1.63s/it]
|
2764 |
10%|β | 2063/20000 [1:14:45<7:55:27, 1.59s/it]
|
2765 |
|
2766 |
10%|β | 2063/20000 [1:14:45<7:55:27, 1.59s/it]
|
2767 |
10%|β | 2064/20000 [1:14:46<7:56:19, 1.59s/it]
|
2768 |
|
2769 |
10%|β | 2064/20000 [1:14:46<7:56:19, 1.59s/it]
|
2770 |
10%|β | 2065/20000 [1:14:48<7:48:13, 1.57s/it]
|
2771 |
|
2772 |
10%|β | 2065/20000 [1:14:48<7:48:13, 1.57s/it]
|
2773 |
10%|β | 2066/20000 [1:14:49<7:16:45, 1.46s/it]
|
2774 |
|
2775 |
10%|β | 2066/20000 [1:14:49<7:16:45, 1.46s/it]
|
2776 |
10%|β | 2067/20000 [1:14:50<6:58:10, 1.40s/it]
|
2777 |
|
2778 |
10%|β | 2067/20000 [1:14:50<6:58:10, 1.40s/it]
|
2779 |
10%|β | 2068/20000 [1:14:51<6:41:33, 1.34s/it]
|
2780 |
|
2781 |
10%|β | 2068/20000 [1:14:51<6:41:33, 1.34s/it]
|
2782 |
10%|β | 2069/20000 [1:14:53<6:26:32, 1.29s/it]
|
2783 |
|
2784 |
10%|β | 2069/20000 [1:14:53<6:26:32, 1.29s/it]
|
2785 |
10%|β | 2070/20000 [1:14:54<6:27:34, 1.30s/it]
|
2786 |
|
2787 |
10%|β | 2070/20000 [1:14:54<6:27:34, 1.30s/it]
|
2788 |
10%|β | 2071/20000 [1:14:55<6:27:27, 1.30s/it]
|
2789 |
|
2790 |
10%|β | 2071/20000 [1:14:55<6:27:27, 1.30s/it]
|
2791 |
10%|β | 2072/20000 [1:14:56<6:10:32, 1.24s/it]
|
2792 |
|
2793 |
10%|β | 2072/20000 [1:14:56<6:10:32, 1.24s/it]
|
2794 |
10%|β | 2073/20000 [1:14:57<5:43:50, 1.15s/it]
|
2795 |
|
2796 |
10%|β | 2073/20000 [1:14:57<5:43:50, 1.15s/it]
|
2797 |
10%|β | 2074/20000 [1:14:58<5:18:06, 1.06s/it]
|
2798 |
|
2799 |
10%|β | 2074/20000 [1:14:58<5:18:06, 1.06s/it]
|
2800 |
10%|β | 2075/20000 [1:15:01<7:44:24, 1.55s/it]
|
2801 |
|
2802 |
10%|β | 2075/20000 [1:15:01<7:44:24, 1.55s/it]
|
2803 |
10%|β | 2076/20000 [1:15:07<15:05:48, 3.03s/it]
|
2804 |
|
2805 |
10%|β | 2076/20000 [1:15:07<15:05:48, 3.03s/it]
|
2806 |
10%|β | 2077/20000 [1:15:12<17:43:39, 3.56s/it]
|
2807 |
|
2808 |
10%|β | 2077/20000 [1:15:12<17:43:39, 3.56s/it]
|
2809 |
10%|β | 2078/20000 [1:15:16<18:58:37, 3.81s/it]
|
2810 |
|
2811 |
10%|β | 2078/20000 [1:15:16<18:58:37, 3.81s/it]
|
2812 |
10%|β | 2079/20000 [1:15:21<19:21:09, 3.89s/it]
|
2813 |
|
2814 |
10%|β | 2079/20000 [1:15:21<19:21:09, 3.89s/it]
|
2815 |
10%|β | 2080/20000 [1:15:24<19:07:58, 3.84s/it]
|
2816 |
|
2817 |
10%|β | 2080/20000 [1:15:24<19:07:58, 3.84s/it]
|
2818 |
10%|β | 2081/20000 [1:15:28<18:37:41, 3.74s/it]
|
2819 |
|
2820 |
10%|β | 2081/20000 [1:15:28<18:37:41, 3.74s/it]slurmstepd: error: *** JOB 2217857 ON gpu36 CANCELLED AT 2024-08-28T13:34:19 ***
|
|
|
2556 |
return fn(*args, **kwargs)
|
2557 |
/scratch/work/palp3/myenv/lib/python3.11/site-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.
|
2558 |
with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]
|
|
|
2559 |
10%|β | 2001/20000 [1:12:23<261:02:18, 52.21s/it]
|
2560 |
|
2561 |
10%|β | 2001/20000 [1:12:23<261:02:18, 52.21s/it]
|
2562 |
10%|β | 2002/20000 [1:12:26<186:03:50, 37.22s/it]
|
2563 |
|
2564 |
10%|β | 2002/20000 [1:12:26<186:03:50, 37.22s/it]
|
2565 |
10%|β | 2003/20000 [1:12:28<133:40:18, 26.74s/it]
|
2566 |
|
2567 |
10%|β | 2003/20000 [1:12:28<133:40:18, 26.74s/it]
|
2568 |
10%|β | 2004/20000 [1:12:30<96:55:36, 19.39s/it]
|
2569 |
|
2570 |
10%|β | 2004/20000 [1:12:30<96:55:36, 19.39s/it]
|
2571 |
10%|β | 2005/20000 [1:12:32<71:03:43, 14.22s/it]
|
2572 |
|
2573 |
10%|β | 2005/20000 [1:12:32<71:03:43, 14.22s/it]
|
2574 |
10%|β | 2006/20000 [1:12:35<53:12:27, 10.65s/it]
|
2575 |
|
2576 |
10%|β | 2006/20000 [1:12:35<53:12:27, 10.65s/it]
|
2577 |
+
|
2578 |
10%|β | 2001/20000 [1:12:23<261:02:18, 52.21s/it]
|
2579 |
|
2580 |
10%|β | 2001/20000 [1:12:23<261:02:18, 52.21s/it]
|
2581 |
10%|β | 2002/20000 [1:12:26<186:03:50, 37.22s/it]
|
2582 |
|
2583 |
10%|β | 2002/20000 [1:12:26<186:03:50, 37.22s/it]
|
2584 |
10%|β | 2003/20000 [1:12:28<133:40:18, 26.74s/it]
|
2585 |
|
2586 |
10%|β | 2003/20000 [1:12:28<133:40:18, 26.74s/it]
|
2587 |
10%|β | 2004/20000 [1:12:30<96:55:36, 19.39s/it]
|
2588 |
|
2589 |
10%|β | 2004/20000 [1:12:30<96:55:36, 19.39s/it]
|
2590 |
10%|β | 2005/20000 [1:12:32<71:03:43, 14.22s/it]
|
2591 |
|
2592 |
10%|β | 2005/20000 [1:12:32<71:03:43, 14.22s/it]
|
2593 |
10%|β | 2006/20000 [1:12:35<53:12:27, 10.65s/it]
|
2594 |
|
2595 |
10%|β | 2006/20000 [1:12:35<53:12:27, 10.65s/it]
|
2596 |
10%|β | 2007/20000 [1:12:37<40:23:30, 8.08s/it]
|
2597 |
|
2598 |
10%|β | 2007/20000 [1:12:37<40:23:30, 8.08s/it]
|
2599 |
10%|β | 2008/20000 [1:12:39<31:07:28, 6.23s/it]
|
2600 |
|
2601 |
10%|β | 2008/20000 [1:12:39<31:07:28, 6.23s/it]
|
2602 |
10%|β | 2009/20000 [1:12:40<24:05:58, 4.82s/it]
|
2603 |
|
2604 |
10%|β | 2009/20000 [1:12:40<24:05:58, 4.82s/it]
|
2605 |
10%|β | 2010/20000 [1:12:42<19:29:58, 3.90s/it]
|
2606 |
|
2607 |
10%|β | 2010/20000 [1:12:42<19:29:58, 3.90s/it]
|
2608 |
10%|β | 2011/20000 [1:12:43<16:01:15, 3.21s/it]
|
2609 |
|
2610 |
10%|β | 2011/20000 [1:12:43<16:01:15, 3.21s/it]
|
2611 |
10%|β | 2012/20000 [1:12:45<13:43:17, 2.75s/it]
|
2612 |
|
2613 |
10%|β | 2012/20000 [1:12:45<13:43:17, 2.75s/it]
|
2614 |
10%|β | 2013/20000 [1:12:47<12:04:33, 2.42s/it]
|
2615 |
|
2616 |
10%|β | 2013/20000 [1:12:47<12:04:33, 2.42s/it]
|
2617 |
10%|β | 2014/20000 [1:12:48<11:00:26, 2.20s/it]
|
2618 |
|
2619 |
10%|β | 2014/20000 [1:12:48<11:00:26, 2.20s/it]
|
2620 |
10%|β | 2015/20000 [1:12:50<10:22:44, 2.08s/it]
|
2621 |
|
2622 |
10%|β | 2015/20000 [1:12:50<10:22:44, 2.08s/it]
|
2623 |
10%|β | 2016/20000 [1:12:52<9:14:52, 1.85s/it]
|
2624 |
|
2625 |
10%|β | 2016/20000 [1:12:52<9:14:52, 1.85s/it]
|
2626 |
10%|β | 2017/20000 [1:12:53<8:27:59, 1.69s/it]
|
2627 |
|
2628 |
10%|β | 2017/20000 [1:12:53<8:27:59, 1.69s/it]
|
2629 |
10%|β | 2018/20000 [1:12:54<7:53:29, 1.58s/it]
|
2630 |
|
2631 |
10%|β | 2018/20000 [1:12:54<7:53:29, 1.58s/it]
|
2632 |
10%|β | 2019/20000 [1:12:56<7:42:42, 1.54s/it]
|
2633 |
|
2634 |
10%|β | 2019/20000 [1:12:56<7:42:42, 1.54s/it]
|
2635 |
10%|β | 2020/20000 [1:12:57<7:34:48, 1.52s/it]
|
2636 |
|
2637 |
10%|β | 2020/20000 [1:12:57<7:34:48, 1.52s/it]
|
2638 |
10%|β | 2021/20000 [1:12:58<7:10:37, 1.44s/it]
|
2639 |
|
2640 |
10%|β | 2021/20000 [1:12:58<7:10:37, 1.44s/it]
|
2641 |
10%|β | 2022/20000 [1:12:59<6:31:49, 1.31s/it]
|
2642 |
|
2643 |
10%|β | 2022/20000 [1:12:59<6:31:49, 1.31s/it]
|
2644 |
10%|β | 2023/20000 [1:13:00<5:55:41, 1.19s/it]
|
2645 |
|
2646 |
10%|β | 2023/20000 [1:13:00<5:55:41, 1.19s/it]
|
2647 |
10%|β | 2024/20000 [1:13:01<5:38:57, 1.13s/it]
|
2648 |
|
2649 |
10%|β | 2024/20000 [1:13:01<5:38:57, 1.13s/it]
|
2650 |
10%|β | 2025/20000 [1:13:05<8:55:39, 1.79s/it]
|
2651 |
|
2652 |
10%|β | 2025/20000 [1:13:05<8:55:39, 1.79s/it]
|
2653 |
10%|β | 2026/20000 [1:13:12<17:31:16, 3.51s/it]
|
2654 |
|
2655 |
10%|β | 2026/20000 [1:13:12<17:31:16, 3.51s/it]
|
2656 |
10%|β | 2027/20000 [1:13:17<19:54:15, 3.99s/it]
|
2657 |
|
2658 |
10%|β | 2027/20000 [1:13:17<19:54:15, 3.99s/it]
|
2659 |
10%|β | 2028/20000 [1:13:22<20:49:08, 4.17s/it]
|
2660 |
|
2661 |
10%|β | 2028/20000 [1:13:22<20:49:08, 4.17s/it]
|
2662 |
10%|β | 2029/20000 [1:13:26<20:19:13, 4.07s/it]
|
2663 |
|
2664 |
10%|β | 2029/20000 [1:13:26<20:19:13, 4.07s/it]
|
2665 |
10%|β | 2030/20000 [1:13:30<20:11:29, 4.05s/it]
|
2666 |
|
2667 |
10%|β | 2030/20000 [1:13:30<20:11:29, 4.05s/it]
|
2668 |
10%|β | 2031/20000 [1:13:33<19:06:48, 3.83s/it]
|
2669 |
|
2670 |
10%|β | 2031/20000 [1:13:33<19:06:48, 3.83s/it]
|
2671 |
10%|β | 2032/20000 [1:13:36<18:32:02, 3.71s/it]
|
2672 |
|
2673 |
10%|β | 2032/20000 [1:13:36<18:32:02, 3.71s/it]
|
2674 |
10%|β | 2033/20000 [1:13:40<17:43:54, 3.55s/it]
|
2675 |
|
2676 |
10%|β | 2033/20000 [1:13:40<17:43:54, 3.55s/it]
|
2677 |
10%|β | 2034/20000 [1:13:43<17:26:07, 3.49s/it]
|
2678 |
|
2679 |
10%|β | 2034/20000 [1:13:43<17:26:07, 3.49s/it]
|
2680 |
10%|β | 2035/20000 [1:13:46<16:33:57, 3.32s/it]
|
2681 |
|
2682 |
10%|β | 2035/20000 [1:13:46<16:33:57, 3.32s/it]
|
2683 |
10%|β | 2036/20000 [1:13:49<15:50:22, 3.17s/it]
|
2684 |
|
2685 |
10%|β | 2036/20000 [1:13:49<15:50:22, 3.17s/it]
|
2686 |
10%|β | 2037/20000 [1:13:52<15:14:17, 3.05s/it]
|
2687 |
|
2688 |
10%|β | 2037/20000 [1:13:52<15:14:17, 3.05s/it]
|
2689 |
10%|β | 2038/20000 [1:13:54<14:50:37, 2.98s/it]
|
2690 |
|
2691 |
10%|β | 2038/20000 [1:13:54<14:50:37, 2.98s/it]
|
2692 |
10%|β | 2039/20000 [1:13:57<14:10:23, 2.84s/it]
|
2693 |
|
2694 |
10%|β | 2039/20000 [1:13:57<14:10:23, 2.84s/it]
|
2695 |
10%|β | 2040/20000 [1:13:59<13:46:35, 2.76s/it]
|
2696 |
|
2697 |
10%|β | 2040/20000 [1:13:59<13:46:35, 2.76s/it]
|
2698 |
10%|β | 2041/20000 [1:14:02<13:40:55, 2.74s/it]
|
2699 |
|
2700 |
10%|β | 2041/20000 [1:14:02<13:40:55, 2.74s/it]
|
2701 |
10%|β | 2042/20000 [1:14:05<13:11:09, 2.64s/it]
|
2702 |
|
2703 |
10%|β | 2042/20000 [1:14:05<13:11:09, 2.64s/it]
|
2704 |
10%|β | 2043/20000 [1:14:07<12:46:32, 2.56s/it]
|
2705 |
|
2706 |
10%|β | 2043/20000 [1:14:07<12:46:32, 2.56s/it]
|
2707 |
10%|β | 2044/20000 [1:14:09<12:05:13, 2.42s/it]
|
2708 |
|
2709 |
10%|β | 2044/20000 [1:14:09<12:05:13, 2.42s/it]
|
2710 |
10%|β | 2045/20000 [1:14:11<11:55:28, 2.39s/it]
|
2711 |
|
2712 |
10%|β | 2045/20000 [1:14:11<11:55:28, 2.39s/it]
|
2713 |
10%|β | 2046/20000 [1:14:13<11:27:33, 2.30s/it]
|
2714 |
|
2715 |
10%|β | 2046/20000 [1:14:13<11:27:33, 2.30s/it]
|
2716 |
10%|β | 2047/20000 [1:14:16<11:13:50, 2.25s/it]
|
2717 |
|
2718 |
10%|β | 2047/20000 [1:14:16<11:13:50, 2.25s/it]
|
2719 |
10%|β | 2048/20000 [1:14:18<11:07:04, 2.23s/it]
|
2720 |
|
2721 |
10%|β | 2048/20000 [1:14:18<11:07:04, 2.23s/it]
|
2722 |
10%|β | 2049/20000 [1:14:20<10:48:18, 2.17s/it]
|
2723 |
|
2724 |
10%|β | 2049/20000 [1:14:20<10:48:18, 2.17s/it]
|
2725 |
10%|β | 2050/20000 [1:14:22<10:25:43, 2.09s/it]
|
2726 |
|
2727 |
10%|β | 2050/20000 [1:14:22<10:25:43, 2.09s/it]
|
2728 |
10%|β | 2051/20000 [1:14:24<10:11:00, 2.04s/it]
|
2729 |
|
2730 |
10%|β | 2051/20000 [1:14:24<10:11:00, 2.04s/it]
|
2731 |
10%|β | 2052/20000 [1:14:26<10:00:50, 2.01s/it]
|
2732 |
|
2733 |
10%|β | 2052/20000 [1:14:26<10:00:50, 2.01s/it]
|
2734 |
10%|β | 2053/20000 [1:14:28<10:08:46, 2.04s/it]
|
2735 |
|
2736 |
10%|β | 2053/20000 [1:14:28<10:08:46, 2.04s/it]
|
2737 |
10%|β | 2054/20000 [1:14:30<9:57:09, 2.00s/it]
|
2738 |
|
2739 |
10%|β | 2054/20000 [1:14:30<9:57:09, 2.00s/it]
|
2740 |
10%|β | 2055/20000 [1:14:32<10:01:01, 2.01s/it]
|
2741 |
|
2742 |
10%|β | 2055/20000 [1:14:32<10:01:01, 2.01s/it]
|
2743 |
10%|β | 2056/20000 [1:14:34<9:57:36, 2.00s/it]
|
2744 |
|
2745 |
10%|β | 2056/20000 [1:14:34<9:57:36, 2.00s/it]
|
2746 |
10%|β | 2057/20000 [1:14:35<9:18:11, 1.87s/it]
|
2747 |
|
2748 |
10%|β | 2057/20000 [1:14:35<9:18:11, 1.87s/it]
|
2749 |
10%|β | 2058/20000 [1:14:37<8:59:02, 1.80s/it]
|
2750 |
|
2751 |
10%|β | 2058/20000 [1:14:37<8:59:02, 1.80s/it]
|
2752 |
10%|β | 2059/20000 [1:14:38<8:39:57, 1.74s/it]
|
2753 |
|
2754 |
10%|β | 2059/20000 [1:14:38<8:39:57, 1.74s/it]
|
2755 |
10%|β | 2060/20000 [1:14:40<8:43:16, 1.75s/it]
|
2756 |
|
2757 |
10%|β | 2060/20000 [1:14:40<8:43:16, 1.75s/it]
|
2758 |
10%|β | 2061/20000 [1:14:42<8:19:10, 1.67s/it]
|
2759 |
|
2760 |
10%|β | 2061/20000 [1:14:42<8:19:10, 1.67s/it]
|
2761 |
10%|β | 2062/20000 [1:14:43<8:08:40, 1.63s/it]
|
2762 |
|
2763 |
10%|β | 2062/20000 [1:14:43<8:08:40, 1.63s/it]
|
2764 |
10%|β | 2063/20000 [1:14:45<7:55:27, 1.59s/it]
|
2765 |
|
2766 |
10%|β | 2063/20000 [1:14:45<7:55:27, 1.59s/it]
|
2767 |
10%|β | 2064/20000 [1:14:46<7:56:19, 1.59s/it]
|
2768 |
|
2769 |
10%|β | 2064/20000 [1:14:46<7:56:19, 1.59s/it]
|
2770 |
10%|β | 2065/20000 [1:14:48<7:48:13, 1.57s/it]
|
2771 |
|
2772 |
10%|β | 2065/20000 [1:14:48<7:48:13, 1.57s/it]
|
2773 |
10%|β | 2066/20000 [1:14:49<7:16:45, 1.46s/it]
|
2774 |
|
2775 |
10%|β | 2066/20000 [1:14:49<7:16:45, 1.46s/it]
|
2776 |
10%|β | 2067/20000 [1:14:50<6:58:10, 1.40s/it]
|
2777 |
|
2778 |
10%|β | 2067/20000 [1:14:50<6:58:10, 1.40s/it]
|
2779 |
10%|β | 2068/20000 [1:14:51<6:41:33, 1.34s/it]
|
2780 |
|
2781 |
10%|β | 2068/20000 [1:14:51<6:41:33, 1.34s/it]
|
2782 |
10%|β | 2069/20000 [1:14:53<6:26:32, 1.29s/it]
|
2783 |
|
2784 |
10%|β | 2069/20000 [1:14:53<6:26:32, 1.29s/it]
|
2785 |
10%|β | 2070/20000 [1:14:54<6:27:34, 1.30s/it]
|
2786 |
|
2787 |
10%|β | 2070/20000 [1:14:54<6:27:34, 1.30s/it]
|
2788 |
10%|β | 2071/20000 [1:14:55<6:27:27, 1.30s/it]
|
2789 |
|
2790 |
10%|β | 2071/20000 [1:14:55<6:27:27, 1.30s/it]
|
2791 |
10%|β | 2072/20000 [1:14:56<6:10:32, 1.24s/it]
|
2792 |
|
2793 |
10%|β | 2072/20000 [1:14:56<6:10:32, 1.24s/it]
|
2794 |
10%|β | 2073/20000 [1:14:57<5:43:50, 1.15s/it]
|
2795 |
|
2796 |
10%|β | 2073/20000 [1:14:57<5:43:50, 1.15s/it]
|
2797 |
10%|β | 2074/20000 [1:14:58<5:18:06, 1.06s/it]
|
2798 |
|
2799 |
10%|β | 2074/20000 [1:14:58<5:18:06, 1.06s/it]
|
2800 |
10%|β | 2075/20000 [1:15:01<7:44:24, 1.55s/it]
|
2801 |
|
2802 |
10%|β | 2075/20000 [1:15:01<7:44:24, 1.55s/it]
|
2803 |
10%|β | 2076/20000 [1:15:07<15:05:48, 3.03s/it]
|
2804 |
|
2805 |
10%|β | 2076/20000 [1:15:07<15:05:48, 3.03s/it]
|
2806 |
10%|β | 2077/20000 [1:15:12<17:43:39, 3.56s/it]
|
2807 |
|
2808 |
10%|β | 2077/20000 [1:15:12<17:43:39, 3.56s/it]
|
2809 |
10%|β | 2078/20000 [1:15:16<18:58:37, 3.81s/it]
|
2810 |
|
2811 |
10%|β | 2078/20000 [1:15:16<18:58:37, 3.81s/it]
|
2812 |
10%|β | 2079/20000 [1:15:21<19:21:09, 3.89s/it]
|
2813 |
|
2814 |
10%|β | 2079/20000 [1:15:21<19:21:09, 3.89s/it]
|
2815 |
10%|β | 2080/20000 [1:15:24<19:07:58, 3.84s/it]
|
2816 |
|
2817 |
10%|β | 2080/20000 [1:15:24<19:07:58, 3.84s/it]
|
2818 |
10%|β | 2081/20000 [1:15:28<18:37:41, 3.74s/it]
|
2819 |
|
2820 |
10%|β | 2081/20000 [1:15:28<18:37:41, 3.74s/it]slurmstepd: error: *** JOB 2217857 ON gpu36 CANCELLED AT 2024-08-28T13:34:19 ***
|
indicwav2vec_trainwtags_MUCS_warmup2000_s300shuff100_2221377.out
ADDED
The diff for this file is too large to render.
See raw diff
|
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"epoch":
|
3 |
-
"total_flos":
|
4 |
-
"train_loss":
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 20000,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
+
"epoch": 9.6,
|
3 |
+
"total_flos": 3.700768773245485e+19,
|
4 |
+
"train_loss": 2.825458660195271,
|
5 |
+
"train_runtime": 12931.7591,
|
6 |
"train_samples": 20000,
|
7 |
+
"train_samples_per_second": 14.847,
|
8 |
+
"train_steps_per_second": 0.464
|
9 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5496
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc96f9a8bd8515e6db2210ce0cdedc5cf02cc0b01b806eac8043e11f1c95f641
|
3 |
size 5496
|