Spaces:
Running
Running
export VLLM_DISABLE_COMPILE_CACHE=1 | |
model_name=$1 | |
MODEL_NAMES=( | |
$model_name | |
) | |
TASKS=( | |
"math" | |
"gsm8k" | |
"amc" | |
"minerva" | |
"olympiad" | |
"aime2024" | |
"aime2025" | |
) | |
GPU_QUEUE=($(nvidia-smi --query-gpu=index --format=csv,noheader)) | |
echo "Available GPUs: ${GPU_QUEUE[@]}" | |
declare -A pids | |
start_job() { | |
local gpu_id="$1" | |
local model="$2" | |
local task="$3" | |
echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] Start task [${task}] with model [${model}] on GPU [${gpu_id}] ..." | |
CUDA_VISIBLE_DEVICES="${gpu_id}" \ | |
python evaluation/generate.py --model "${model}" --dataset "${task}" & | |
pids["${gpu_id}"]=$! | |
} | |
for MODEL_NAME in "${MODEL_NAMES[@]}"; do | |
echo "==> Processing model: ${MODEL_NAME}" | |
TASK_INDEX=0 | |
NUM_TASKS=${#TASKS[@]} | |
while :; do | |
while [ ${#GPU_QUEUE[@]} -gt 0 ] && [ ${TASK_INDEX} -lt ${NUM_TASKS} ]; do | |
gpu_id="${GPU_QUEUE[0]}" | |
GPU_QUEUE=("${GPU_QUEUE[@]:1}") | |
task="${TASKS[${TASK_INDEX}]}" | |
((TASK_INDEX++)) | |
start_job "$gpu_id" "$MODEL_NAME" "$task" | |
done | |
if [ ${TASK_INDEX} -ge ${NUM_TASKS} ] && [ ${#pids[@]} -eq 0 ]; then | |
break | |
fi | |
for gpu_id in "${!pids[@]}"; do | |
pid="${pids[$gpu_id]}" | |
if ! kill -0 "$pid" 2>/dev/null; then | |
echo "==> [$(date '+%Y-%m-%d %H:%M:%S')] GPU [${gpu_id}] job finished with PID [${pid}]." | |
unset pids["$gpu_id"] | |
GPU_QUEUE+=("$gpu_id") | |
fi | |
done | |
sleep 1 | |
done | |
done | |
python evaluation/results_recheck.py --model_name $model_name & | |
python evaluation/eval_supergpqa.py --model_path $model_name | |
python evaluation/eval_bbeh.py --model_path $model_name | |
python evaluation/eval_mmlupro.py --model_path $model_name | |
python evaluation/test.py --model_name $model_name | |
echo "==> All tasks have finished!" |