pere commited on
Commit
68b6324
·
verified ·
1 Parent(s): e466c9c

Saving train state of step 2000

Browse files
checkpoint-2000-epoch-3/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3383ce8239fd9a5346296993f4068931faff75aafbb1e863f55802be68be183b
3
  size 3025686376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93a3ac6fa4c717512dc0856ac258d0d555a67425f9fa46cde554fe5712a0b37f
3
  size 3025686376
checkpoint-2000-epoch-3/model_1.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:56570ecf66e2cbf1e212810317afdc44b85396298beab22e66ff759a1116f26a
3
- size 4361069272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28897ec4b789c0dc382a6975366fcb16206be64b6b691a60b218831c8f6af1ea
3
+ size 4361070048
checkpoint-2000-epoch-3/optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69da6783a2bb6483a2623217bf874ce7cd7d99e80a36638e0ffcc67bf80de6e7
3
  size 950951226
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:294e12cadb7ecb51806f8ec6010da51f2efb89c17b28f09fbe6f861bb53a37b9
3
  size 950951226
checkpoint-2000-epoch-3/scheduler.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dad6b56b74593b411aa2335a4636d028f73ce8d740f99b52582f884503cebaa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e72412f426c58539f1dcfef4d31369e79764f60ce3a6e20df06cde830d8946e
3
  size 1064
run_large_training.sh CHANGED
@@ -29,8 +29,8 @@ accelerate launch run_distillation.py \
29
  --dataloader_num_workers 8 \
30
  --preprocessing_num_workers 8 \
31
  --ddp_timeout 7200 \
32
- --dtype "float16" \
33
- --attn_implementation "flash_attention_2" \
34
  --output_dir "./" \
35
  --do_train \
36
  --do_eval \
 
29
  --dataloader_num_workers 8 \
30
  --preprocessing_num_workers 8 \
31
  --ddp_timeout 7200 \
32
+ --dtype "bfloat16" \
33
+ --attn_implementation "sdpa" \
34
  --output_dir "./" \
35
  --do_train \
36
  --do_eval \