AbrahamicSolver / question_generate.py
Gatsby767's picture
Rename math.py to app_math.py and update imports to avoid stdlib conflict
84bfc85
import vllm
import torch
from transformers import AutoTokenizer
import argparse
from typing import List
from vllm.outputs import RequestOutput
from evaluation.datasets_loader import get_dataset_handler
import json
import regex as re
import os
STORAGE_PATH = os.getenv("STORAGE_PATH")
def extract_boxed(text):
results, i = [], 0
prefix = r'\boxed{'
plen = len(prefix)
while True:
start = text.find(prefix, i)
if start == -1:
break # no more \boxed{…}
j = start + plen
depth = 1
while j < len(text) and depth:
if text[j] == '{':
depth += 1
elif text[j] == '}':
depth -= 1
j += 1
results.append(text[start + plen : j - 1])
i = j
return results
def get_response_mask(response_ids, eos_token_id, dtype):
batch_size, seq_len = response_ids.shape
mask = torch.ones((batch_size, seq_len), dtype=dtype)
for i in range(batch_size):
for j in range(seq_len):
if response_ids[i][j] == eos_token_id:
mask[i][j:] = 0
break
return mask
def main(args):
tokenizer = AutoTokenizer.from_pretrained(args.model)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
model = vllm.LLM(
model=args.model,
tokenizer=args.model,
# gpu_memory_utilization=0.8,
seed=int(args.suffix),
)
dataset_handler = get_dataset_handler("math")
questions, answers = dataset_handler.load_data()
question = questions[0]
answer = answers[0]
chat = [
{
"role": "system",
"content": (
"You are an expert competition-math problem setter.\n"
"FIRST, in your private scratch-pad, think step-by-step to design a brand-new, non-trivial problem. "
"The problem could come from any field of mathematics, including but not limited to algebra, geometry, number theory, combinatorics, prealgebra, probability, statistics, and calculus. "
"Aim for a difficulty such that fewer than 30 % of advanced high-school students could solve it. "
"Avoid re-using textbook clichés or famous contest problems.\n"
"THEN, without revealing any of your private thoughts, output **exactly** the following two blocks:\n\n"
"<question>\n"
"{The full problem statement on one or more lines}\n"
"</question>\n\n"
r"\boxed{final_answer}"
"\n\n"
"Do NOT output anything else—no explanations, no extra markup."
)
},
{
"role": "user",
"content": (
"Generate one new, challenging reasoning question now. "
"Remember to format the output exactly as instructed."
)
}
]
if tokenizer.chat_template:
prompt = tokenizer.apply_chat_template(
chat,
tokenize=False,
add_generation_prompt=True,
add_special_tokens=True
)
else:
prompt = "system: " + chat[0]["content"] + '\n' + "user: " + chat[1]["content"]
sample_params = vllm.SamplingParams(
max_tokens=4096,
temperature=1.0,
top_p=0.95,
n=1,
stop_token_ids=[tokenizer.eos_token_id],
)
completions: List[RequestOutput] = model.generate([prompt]*args.num_samples, sampling_params=sample_params)
results=[]
for completion in completions:
response = completion.outputs[0].text
try:
questions = re.findall(r"<question>(.*?)</question>", response, re.DOTALL)
answers = extract_boxed(response)
if questions and answers:
question = questions[-1].strip()
answer = answers[-1].strip()
results.append({"question": question, "answer": answer, "score": 0})
else:
results.append({"question": response, "answer": "", "score": -1})
except:
results.append({"question": response, "answer": "", "score": -1})
with open(f"{STORAGE_PATH}/generated_question/{args.save_name}_{args.suffix}.json", "w") as f:
json.dump(results, f, indent=4)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, default="Qwen/Qwen3-4B")
parser.add_argument("--num_samples", type=int, default=1250, help="Number of samples to generate")
parser.add_argument("--suffix", type=str, default="", help="Suffix to add to the output file")
parser.add_argument("--save_name", type=str, default="", help="")
args = parser.parse_args()
main(args)