yhavinga commited on
Commit
19fae6f
·
1 Parent(s): d42172a

Update model

Browse files
eval_results.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "eval_accuracy": 0.7494515776634216,
3
- "eval_loss": 0.8329918384552002
4
  }
 
1
  {
2
+ "eval_accuracy": 0.7501800656318665,
3
+ "eval_loss": 0.8296337723731995
4
  }
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6adf5dfa55da480dfa0a8c47f53d1cb4cee581f3f22e017c7591c3717ef2416
3
  size 1198558445
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5a8b116ac6d7591d0dfac6bbdb45dfd26d32bce65d4e385d2a346ad7957fda
3
  size 1198558445
run_byt5_small-mc4.sh CHANGED
@@ -12,7 +12,6 @@ export MODEL_PATH="${HOME}/data/${HF_PROJECT}" # Path to the model
12
  mkdir -p "${MODEL_PATH}"
13
 
14
  python ../train/run_t5_mlm_flax_pmap.py \
15
- --resume_from_checkpoint="${MODEL_PATH}" \
16
  --output_dir="${MODEL_PATH}" \
17
  --model_type="t5" \
18
  --config_name="${CONFIG_NAME}" \
@@ -26,7 +25,8 @@ python ../train/run_t5_mlm_flax_pmap.py \
26
  --per_device_eval_batch_size="16" \
27
  --gradient_accumulation_steps="8" \
28
  --mean_noise_span_length="20" \
29
- --dtype="float32" \
 
30
  --optim="adafactor" \
31
  --learning_rate="0.0034" \
32
  --lr_decay="linear" \
@@ -45,6 +45,7 @@ python ../train/run_t5_mlm_flax_pmap.py \
45
  # --max_eval_samples="1000" \
46
 
47
  # --model_name_or_path="${MODEL_PATH}" \
 
48
 
49
  # \
50
 
 
12
  mkdir -p "${MODEL_PATH}"
13
 
14
  python ../train/run_t5_mlm_flax_pmap.py \
 
15
  --output_dir="${MODEL_PATH}" \
16
  --model_type="t5" \
17
  --config_name="${CONFIG_NAME}" \
 
25
  --per_device_eval_batch_size="16" \
26
  --gradient_accumulation_steps="8" \
27
  --mean_noise_span_length="20" \
28
+ --dtype="bfloat16" \
29
+ --z_loss="1e-4" \
30
  --optim="adafactor" \
31
  --learning_rate="0.0034" \
32
  --lr_decay="linear" \
 
45
  # --max_eval_samples="1000" \
46
 
47
  # --model_name_or_path="${MODEL_PATH}" \
48
+ # --resume_from_checkpoint="${MODEL_PATH}" \
49
 
50
  # \
51