davisrbr commited on
Commit
dcf09d1
·
verified ·
1 Parent(s): d5dd15d

Model save

Browse files
.ipynb_checkpoints/aqlm_2bit_training-checkpoint.ipynb CHANGED
@@ -387,8 +387,8 @@
387
  "\n",
388
  " <div>\n",
389
  " \n",
390
- " <progress value='22' max='10000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
391
- " [ 22/10000 09:16 < 77:05:02, 0.04 it/s, Epoch 0.01/4]\n",
392
  " </div>\n",
393
  " <table border=\"1\" class=\"dataframe\">\n",
394
  " <thead>\n",
@@ -402,6 +402,154 @@
402
  " <td>1</td>\n",
403
  " <td>5.558500</td>\n",
404
  " </tr>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
405
  " </tbody>\n",
406
  "</table><p>"
407
  ],
@@ -411,6 +559,22 @@
411
  },
412
  "metadata": {},
413
  "output_type": "display_data"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  }
415
  ],
416
  "source": [
 
387
  "\n",
388
  " <div>\n",
389
  " \n",
390
+ " <progress value='929' max='10000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
391
+ " [ 929/10000 7:12:36 < 70:33:12, 0.04 it/s, Epoch 0.30/4]\n",
392
  " </div>\n",
393
  " <table border=\"1\" class=\"dataframe\">\n",
394
  " <thead>\n",
 
402
  " <td>1</td>\n",
403
  " <td>5.558500</td>\n",
404
  " </tr>\n",
405
+ " <tr>\n",
406
+ " <td>25</td>\n",
407
+ " <td>4.310400</td>\n",
408
+ " </tr>\n",
409
+ " <tr>\n",
410
+ " <td>50</td>\n",
411
+ " <td>1.984600</td>\n",
412
+ " </tr>\n",
413
+ " <tr>\n",
414
+ " <td>75</td>\n",
415
+ " <td>1.548100</td>\n",
416
+ " </tr>\n",
417
+ " <tr>\n",
418
+ " <td>100</td>\n",
419
+ " <td>1.286000</td>\n",
420
+ " </tr>\n",
421
+ " <tr>\n",
422
+ " <td>125</td>\n",
423
+ " <td>1.133400</td>\n",
424
+ " </tr>\n",
425
+ " <tr>\n",
426
+ " <td>150</td>\n",
427
+ " <td>1.040200</td>\n",
428
+ " </tr>\n",
429
+ " <tr>\n",
430
+ " <td>175</td>\n",
431
+ " <td>0.977800</td>\n",
432
+ " </tr>\n",
433
+ " <tr>\n",
434
+ " <td>200</td>\n",
435
+ " <td>0.913900</td>\n",
436
+ " </tr>\n",
437
+ " <tr>\n",
438
+ " <td>225</td>\n",
439
+ " <td>0.909900</td>\n",
440
+ " </tr>\n",
441
+ " <tr>\n",
442
+ " <td>250</td>\n",
443
+ " <td>0.854600</td>\n",
444
+ " </tr>\n",
445
+ " <tr>\n",
446
+ " <td>275</td>\n",
447
+ " <td>0.851700</td>\n",
448
+ " </tr>\n",
449
+ " <tr>\n",
450
+ " <td>300</td>\n",
451
+ " <td>0.832200</td>\n",
452
+ " </tr>\n",
453
+ " <tr>\n",
454
+ " <td>325</td>\n",
455
+ " <td>0.810900</td>\n",
456
+ " </tr>\n",
457
+ " <tr>\n",
458
+ " <td>350</td>\n",
459
+ " <td>0.816500</td>\n",
460
+ " </tr>\n",
461
+ " <tr>\n",
462
+ " <td>375</td>\n",
463
+ " <td>0.796300</td>\n",
464
+ " </tr>\n",
465
+ " <tr>\n",
466
+ " <td>400</td>\n",
467
+ " <td>0.810300</td>\n",
468
+ " </tr>\n",
469
+ " <tr>\n",
470
+ " <td>425</td>\n",
471
+ " <td>0.767200</td>\n",
472
+ " </tr>\n",
473
+ " <tr>\n",
474
+ " <td>450</td>\n",
475
+ " <td>0.767100</td>\n",
476
+ " </tr>\n",
477
+ " <tr>\n",
478
+ " <td>475</td>\n",
479
+ " <td>0.772500</td>\n",
480
+ " </tr>\n",
481
+ " <tr>\n",
482
+ " <td>500</td>\n",
483
+ " <td>0.788000</td>\n",
484
+ " </tr>\n",
485
+ " <tr>\n",
486
+ " <td>525</td>\n",
487
+ " <td>0.741900</td>\n",
488
+ " </tr>\n",
489
+ " <tr>\n",
490
+ " <td>550</td>\n",
491
+ " <td>0.757600</td>\n",
492
+ " </tr>\n",
493
+ " <tr>\n",
494
+ " <td>575</td>\n",
495
+ " <td>0.732800</td>\n",
496
+ " </tr>\n",
497
+ " <tr>\n",
498
+ " <td>600</td>\n",
499
+ " <td>0.741600</td>\n",
500
+ " </tr>\n",
501
+ " <tr>\n",
502
+ " <td>625</td>\n",
503
+ " <td>0.749000</td>\n",
504
+ " </tr>\n",
505
+ " <tr>\n",
506
+ " <td>650</td>\n",
507
+ " <td>0.723700</td>\n",
508
+ " </tr>\n",
509
+ " <tr>\n",
510
+ " <td>675</td>\n",
511
+ " <td>0.735200</td>\n",
512
+ " </tr>\n",
513
+ " <tr>\n",
514
+ " <td>700</td>\n",
515
+ " <td>0.731500</td>\n",
516
+ " </tr>\n",
517
+ " <tr>\n",
518
+ " <td>725</td>\n",
519
+ " <td>0.711800</td>\n",
520
+ " </tr>\n",
521
+ " <tr>\n",
522
+ " <td>750</td>\n",
523
+ " <td>0.702200</td>\n",
524
+ " </tr>\n",
525
+ " <tr>\n",
526
+ " <td>775</td>\n",
527
+ " <td>0.714100</td>\n",
528
+ " </tr>\n",
529
+ " <tr>\n",
530
+ " <td>800</td>\n",
531
+ " <td>0.705400</td>\n",
532
+ " </tr>\n",
533
+ " <tr>\n",
534
+ " <td>825</td>\n",
535
+ " <td>0.711800</td>\n",
536
+ " </tr>\n",
537
+ " <tr>\n",
538
+ " <td>850</td>\n",
539
+ " <td>0.687200</td>\n",
540
+ " </tr>\n",
541
+ " <tr>\n",
542
+ " <td>875</td>\n",
543
+ " <td>0.708400</td>\n",
544
+ " </tr>\n",
545
+ " <tr>\n",
546
+ " <td>900</td>\n",
547
+ " <td>0.690700</td>\n",
548
+ " </tr>\n",
549
+ " <tr>\n",
550
+ " <td>925</td>\n",
551
+ " <td>0.697200</td>\n",
552
+ " </tr>\n",
553
  " </tbody>\n",
554
  "</table><p>"
555
  ],
 
559
  },
560
  "metadata": {},
561
  "output_type": "display_data"
562
+ },
563
+ {
564
+ "name": "stderr",
565
+ "output_type": "stream",
566
+ "text": [
567
+ "No files have been modified since last commit. Skipping to prevent empty commit.\n",
568
+ "No files have been modified since last commit. Skipping to prevent empty commit.\n",
569
+ "No files have been modified since last commit. Skipping to prevent empty commit.\n",
570
+ "No files have been modified since last commit. Skipping to prevent empty commit.\n",
571
+ "No files have been modified since last commit. Skipping to prevent empty commit.\n",
572
+ "/usr/local/lib/python3.10/dist-packages/torch/_dynamo/eval_frame.py:600: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.4 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.\n",
573
+ " return fn(*args, **kwargs)\n",
574
+ "No files have been modified since last commit. Skipping to prevent empty commit.\n",
575
+ "/usr/local/lib/python3.10/dist-packages/torch/utils/checkpoint.py:295: FutureWarning: `torch.cpu.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cpu', args...)` instead.\n",
576
+ " with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs): # type: ignore[attr-defined]\n"
577
+ ]
578
  }
579
  ],
580
  "source": [
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2963aabb8ed049140b2a5f8568354fe94479fb4d8305aeb3892f541b50901e10
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afdc8b2b69fa2c74f372ba9d4b5c6478695d88a02448bf9f25b262d40ca93538
3
  size 167832240
aqlm_2bit_training.ipynb CHANGED
@@ -368,7 +368,9 @@
368
  {
369
  "cell_type": "code",
370
  "execution_count": null,
371
- "metadata": {},
 
 
372
  "outputs": [
373
  {
374
  "name": "stderr",
@@ -387,8 +389,8 @@
387
  "\n",
388
  " <div>\n",
389
  " \n",
390
- " <progress value='521' max='10000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
391
- " [ 521/10000 4:01:56 < 73:38:57, 0.04 it/s, Epoch 0.17/4]\n",
392
  " </div>\n",
393
  " <table border=\"1\" class=\"dataframe\">\n",
394
  " <thead>\n",
@@ -482,6 +484,82 @@
482
  " <td>500</td>\n",
483
  " <td>0.788000</td>\n",
484
  " </tr>\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
  " </tbody>\n",
486
  "</table><p>"
487
  ],
 
368
  {
369
  "cell_type": "code",
370
  "execution_count": null,
371
+ "metadata": {
372
+ "scrolled": true
373
+ },
374
  "outputs": [
375
  {
376
  "name": "stderr",
 
389
  "\n",
390
  " <div>\n",
391
  " \n",
392
+ " <progress value='994' max='10000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
393
+ " [ 994/10000 7:43:00 < 70:03:24, 0.04 it/s, Epoch 0.32/4]\n",
394
  " </div>\n",
395
  " <table border=\"1\" class=\"dataframe\">\n",
396
  " <thead>\n",
 
484
  " <td>500</td>\n",
485
  " <td>0.788000</td>\n",
486
  " </tr>\n",
487
+ " <tr>\n",
488
+ " <td>525</td>\n",
489
+ " <td>0.741900</td>\n",
490
+ " </tr>\n",
491
+ " <tr>\n",
492
+ " <td>550</td>\n",
493
+ " <td>0.757600</td>\n",
494
+ " </tr>\n",
495
+ " <tr>\n",
496
+ " <td>575</td>\n",
497
+ " <td>0.732800</td>\n",
498
+ " </tr>\n",
499
+ " <tr>\n",
500
+ " <td>600</td>\n",
501
+ " <td>0.741600</td>\n",
502
+ " </tr>\n",
503
+ " <tr>\n",
504
+ " <td>625</td>\n",
505
+ " <td>0.749000</td>\n",
506
+ " </tr>\n",
507
+ " <tr>\n",
508
+ " <td>650</td>\n",
509
+ " <td>0.723700</td>\n",
510
+ " </tr>\n",
511
+ " <tr>\n",
512
+ " <td>675</td>\n",
513
+ " <td>0.735200</td>\n",
514
+ " </tr>\n",
515
+ " <tr>\n",
516
+ " <td>700</td>\n",
517
+ " <td>0.731500</td>\n",
518
+ " </tr>\n",
519
+ " <tr>\n",
520
+ " <td>725</td>\n",
521
+ " <td>0.711800</td>\n",
522
+ " </tr>\n",
523
+ " <tr>\n",
524
+ " <td>750</td>\n",
525
+ " <td>0.702200</td>\n",
526
+ " </tr>\n",
527
+ " <tr>\n",
528
+ " <td>775</td>\n",
529
+ " <td>0.714100</td>\n",
530
+ " </tr>\n",
531
+ " <tr>\n",
532
+ " <td>800</td>\n",
533
+ " <td>0.705400</td>\n",
534
+ " </tr>\n",
535
+ " <tr>\n",
536
+ " <td>825</td>\n",
537
+ " <td>0.711800</td>\n",
538
+ " </tr>\n",
539
+ " <tr>\n",
540
+ " <td>850</td>\n",
541
+ " <td>0.687200</td>\n",
542
+ " </tr>\n",
543
+ " <tr>\n",
544
+ " <td>875</td>\n",
545
+ " <td>0.708400</td>\n",
546
+ " </tr>\n",
547
+ " <tr>\n",
548
+ " <td>900</td>\n",
549
+ " <td>0.690700</td>\n",
550
+ " </tr>\n",
551
+ " <tr>\n",
552
+ " <td>925</td>\n",
553
+ " <td>0.697200</td>\n",
554
+ " </tr>\n",
555
+ " <tr>\n",
556
+ " <td>950</td>\n",
557
+ " <td>0.698000</td>\n",
558
+ " </tr>\n",
559
+ " <tr>\n",
560
+ " <td>975</td>\n",
561
+ " <td>0.681700</td>\n",
562
+ " </tr>\n",
563
  " </tbody>\n",
564
  "</table><p>"
565
  ],