AbrahamicSolver / evaluate.bash
Gatsby767's picture
Upload 7 files
47a4065 verified
#!/bin/bash
export VLLM_DISABLE_COMPILE_CACHE=1
model_name=$1
MODEL_NAMES=(
$model_name
)
TASKS=(
"math"
"gsm8k"
"amc"
"minerva"
"olympiad"
"aime2024"
"aime2025"
)
GPU_QUEUE=($(nvidia-smi --query-gpu=index --format=csv,noheader))
echo "Available GPUs: ${GPU_QUEUE[@]}"
declare -A pids
start_job() {
local gpu_id="$1"
local model="$2"
local task="$3"
echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] Start task [${task}] with model [${model}] on GPU [${gpu_id}] ..."
CUDA_VISIBLE_DEVICES="${gpu_id}" \
python evaluation/generate.py --model "${model}" --dataset "${task}" &
pids["${gpu_id}"]=$!
}
for MODEL_NAME in "${MODEL_NAMES[@]}"; do
echo "==> Processing model: ${MODEL_NAME}"
TASK_INDEX=0
NUM_TASKS=${#TASKS[@]}
while :; do
while [ ${#GPU_QUEUE[@]} -gt 0 ] && [ ${TASK_INDEX} -lt ${NUM_TASKS} ]; do
gpu_id="${GPU_QUEUE[0]}"
GPU_QUEUE=("${GPU_QUEUE[@]:1}")
task="${TASKS[${TASK_INDEX}]}"
((TASK_INDEX++))
start_job "$gpu_id" "$MODEL_NAME" "$task"
done
if [ ${TASK_INDEX} -ge ${NUM_TASKS} ] && [ ${#pids[@]} -eq 0 ]; then
break
fi
for gpu_id in "${!pids[@]}"; do
pid="${pids[$gpu_id]}"
if ! kill -0 "$pid" 2>/dev/null; then
echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] GPU [${gpu_id}] job finished with PID [${pid}]."
unset pids["$gpu_id"]
GPU_QUEUE+=("$gpu_id")
fi
done
sleep 1
done
done
python evaluation/results_recheck.py --model_name $model_name &
python evaluation/eval_supergpqa.py --model_path $model_name
python evaluation/eval_bbeh.py --model_path $model_name
python evaluation/eval_mmlupro.py --model_path $model_name
python evaluation/test.py --model_name $model_name
echo "==> All tasks have finished!"