gpt2-finetuned-oscar-fr-ori-tok / upload_hf_checkpoints.sh
yongzx's picture
update main
b3f3f7e
#!/bin/bash
# Request half an hour of runtime:
#SBATCH --time=2-23:59:00
# Ask for the GPU partition and 1 GPU
#SBATCH --partition=3090-gcondo --gres=gpu:1
# Default resources are 1 core with 2.8GB of memory.
#SBATCH --ntasks=2
# Use more memory (10GB) (CPU RAM):
#SBATCH --mem=10g
# Specify a job name:
#SBATCH -J exp-misc-upload_hf_checkpoints_fr_ori_tok
# Specify an output file
#SBATCH -o /users/zyong2/data/zyong2/bigscience/logs/misc/upload_hf_checkpoints_fr_ori_tok.out
#SBATCH -e /users/zyong2/data/zyong2/bigscience/logs/misc/upload_hf_checkpoints_fr_ori_tok.err
# Set up the environment by loading modules
set -a # automatically export all variables
source ~/.env
set +a
module load python/3.7.4
module load gitlfs/2.7.1
source $FP_BIGS/env_lang_mod/bin/activate
# login huggingface
huggingface-cli login <<EOF
yongzx
Y0ngzh3ngx!nHF
EOF
# download huggingface-dir
MODEL="gpt2-finetuned-oscar-fr-ori-tok"
if [ -d "/users/zyong2/data/zyong2/bigscience/gh/$MODEL" ]
then
echo "Directory /users/zyong2/data/zyong2/bigscience/gh/$MODEL exists."
else
cd /users/zyong2/data/zyong2/bigscience/gh/
hf_dir="https://huggingface.co/yongzx/$MODEL"
echo $hf_dir
git clone $hf_dir
fi
# inside the directory
cd /users/zyong2/data/zyong2/bigscience/gh/$MODEL
# upload to huggingface-hub
ORIGINAL_MODEL_DIR="/users/zyong2/data/zyong2/bigscience/data/processed/exp-001/ft-gpt2-no-tok"
for CHECKPOINT in {500..76500..500}
do
git checkout main
exists=`git show-ref refs/heads/checkpoint-$CHECKPOINT`
if [ -n "$exists" ];
then
git checkout "checkpoint-$CHECKPOINT"
else
git checkout -b "checkpoint-$CHECKPOINT"
fi
git status
git pull origin "checkpoint-$CHECKPOINT"
echo "๐Ÿš€ Copying from folder ${ORIGINAL_MODEL_DIR}/checkpoint-${CHECKPOINT}"
cp "${ORIGINAL_MODEL_DIR}/checkpoint-${CHECKPOINT}/config.json" ./
cp "${ORIGINAL_MODEL_DIR}/checkpoint-${CHECKPOINT}/merges.txt" ./
cp "${ORIGINAL_MODEL_DIR}/checkpoint-${CHECKPOINT}/pytorch_model.bin" ./
cp "${ORIGINAL_MODEL_DIR}/checkpoint-${CHECKPOINT}/special_tokens_map.json" ./
cp "${ORIGINAL_MODEL_DIR}/checkpoint-${CHECKPOINT}/tokenizer.json" ./
cp "${ORIGINAL_MODEL_DIR}/checkpoint-${CHECKPOINT}/tokenizer_config.json" ./
cp "${ORIGINAL_MODEL_DIR}/checkpoint-${CHECKPOINT}/training_args.bin" ./
cp "${ORIGINAL_MODEL_DIR}/checkpoint-${CHECKPOINT}/vocab.json" ./
git status
git add . && git commit -m "checkpoint $CHECKPOINT"
git push origin "checkpoint-$CHECKPOINT"
echo "============================================================================================="
done