Update model
Browse files- eval_results.json +2 -2
- flax_model.msgpack +1 -1
- run_byt5_small-mc4.sh +3 -2
eval_results.json
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
{
|
2 |
-
"eval_accuracy": 0.
|
3 |
-
"eval_loss": 0.
|
4 |
}
|
|
|
1 |
{
|
2 |
+
"eval_accuracy": 0.7501800656318665,
|
3 |
+
"eval_loss": 0.8296337723731995
|
4 |
}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1198558445
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f5a8b116ac6d7591d0dfac6bbdb45dfd26d32bce65d4e385d2a346ad7957fda
|
3 |
size 1198558445
|
run_byt5_small-mc4.sh
CHANGED
@@ -12,7 +12,6 @@ export MODEL_PATH="${HOME}/data/${HF_PROJECT}" # Path to the model
|
|
12 |
mkdir -p "${MODEL_PATH}"
|
13 |
|
14 |
python ../train/run_t5_mlm_flax_pmap.py \
|
15 |
-
--resume_from_checkpoint="${MODEL_PATH}" \
|
16 |
--output_dir="${MODEL_PATH}" \
|
17 |
--model_type="t5" \
|
18 |
--config_name="${CONFIG_NAME}" \
|
@@ -26,7 +25,8 @@ python ../train/run_t5_mlm_flax_pmap.py \
|
|
26 |
--per_device_eval_batch_size="16" \
|
27 |
--gradient_accumulation_steps="8" \
|
28 |
--mean_noise_span_length="20" \
|
29 |
-
--dtype="
|
|
|
30 |
--optim="adafactor" \
|
31 |
--learning_rate="0.0034" \
|
32 |
--lr_decay="linear" \
|
@@ -45,6 +45,7 @@ python ../train/run_t5_mlm_flax_pmap.py \
|
|
45 |
# --max_eval_samples="1000" \
|
46 |
|
47 |
# --model_name_or_path="${MODEL_PATH}" \
|
|
|
48 |
|
49 |
# \
|
50 |
|
|
|
12 |
mkdir -p "${MODEL_PATH}"
|
13 |
|
14 |
python ../train/run_t5_mlm_flax_pmap.py \
|
|
|
15 |
--output_dir="${MODEL_PATH}" \
|
16 |
--model_type="t5" \
|
17 |
--config_name="${CONFIG_NAME}" \
|
|
|
25 |
--per_device_eval_batch_size="16" \
|
26 |
--gradient_accumulation_steps="8" \
|
27 |
--mean_noise_span_length="20" \
|
28 |
+
--dtype="bfloat16" \
|
29 |
+
--z_loss="1e-4" \
|
30 |
--optim="adafactor" \
|
31 |
--learning_rate="0.0034" \
|
32 |
--lr_decay="linear" \
|
|
|
45 |
# --max_eval_samples="1000" \
|
46 |
|
47 |
# --model_name_or_path="${MODEL_PATH}" \
|
48 |
+
# --resume_from_checkpoint="${MODEL_PATH}" \
|
49 |
|
50 |
# \
|
51 |
|