pere commited on
Commit
2d7a9d1
·
1 Parent(s): f043e17

Saving weights and logs of step 10000

Browse files
config.json CHANGED
@@ -21,7 +21,7 @@
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "bfloat16",
24
- "transformers_version": "4.14.0.dev0",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 50265
 
21
  "pad_token_id": 1,
22
  "position_embedding_type": "absolute",
23
  "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.16.0.dev0",
25
  "type_vocab_size": 1,
26
  "use_cache": true,
27
  "vocab_size": 50265
events.out.tfevents.1640883381.t1v-n-e1a08808-w-0.315965.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de870d4511ea6c2936e5dc3234370e389dd77a065fc9c1db21fa0da61126a2f8
3
+ size 40
events.out.tfevents.1640906290.t1v-n-e1a08808-w-0.356422.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b6066f9af1f00c3dc400093acd04238a3fe4aad6ba2bc8e926969521ba07e3e
3
+ size 1470136
flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d70201bc16c7d6ca23e7a2c885f8a3e434db1d78ed3426f592f9904bda35c4b
3
+ size 498796983
run_mlm_flax.py CHANGED
@@ -622,6 +622,7 @@ if __name__ == "__main__":
622
 
623
  # Generate an epoch by shuffling sampling indices from the train dataset
624
  num_train_samples = len(tokenized_datasets["train"])
 
625
  train_samples_idx = jax.random.permutation(input_rng, jnp.arange(num_train_samples))
626
  train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size)
627
 
 
622
 
623
  # Generate an epoch by shuffling sampling indices from the train dataset
624
  num_train_samples = len(tokenized_datasets["train"])
625
+ print(f'Total number of training samples: {num_train_samples}')
626
  train_samples_idx = jax.random.permutation(input_rng, jnp.arange(num_train_samples))
627
  train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size)
628
 
run_step1.sh CHANGED
@@ -3,12 +3,13 @@
3
  --model_type="roberta" \
4
  --config_name="./" \
5
  --tokenizer_name="./" \
6
- --dataset_name="NbAiLab/nbailab_extended" \
 
7
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
8
  --max_seq_length="128" \
9
  --weight_decay="0.01" \
10
- --per_device_train_batch_size="256" \
11
- --per_device_eval_batch_size="256" \
12
  --learning_rate="4e-4" \
13
  --warmup_steps="10000" \
14
  --overwrite_output_dir \
 
3
  --model_type="roberta" \
4
  --config_name="./" \
5
  --tokenizer_name="./" \
6
+ --train_file /mnt/disks/flaxdisk/corpus/train_1_4.json \
7
+ --validation_file /mnt/disks/flaxdisk/corpus/validation.json \
8
  --cache_dir="/mnt/disks/flaxdisk/cache/" \
9
  --max_seq_length="128" \
10
  --weight_decay="0.01" \
11
+ --per_device_train_batch_size="200" \
12
+ --per_device_eval_batch_size="200" \
13
  --learning_rate="4e-4" \
14
  --warmup_steps="10000" \
15
  --overwrite_output_dir \