File size: 2,280 Bytes
e7d3536
fe6431b
e7d3536
fe6431b
 
 
 
a359070
fe6431b
e7d3536
 
 
 
 
 
 
 
 
 
 
 
fe6431b
e7d3536
 
 
fe6431b
 
e7d3536
fe6431b
e7d3536
fe6431b
a359070
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fe6431b
e7d3536
 
 
 
e15a615
fe6431b
 
e15a615
fe6431b
 
e7d3536
 
fe6431b
e7d3536
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Loading the fine-tuned model
model_name = "./T5base_Question_Generation"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

def get_question(tag, difficulty, context, answer="", num_questions=1, use_beam_search=False, num_beams=3, max_length=150):
    """
    Generate questions using the fine-tuned T5 model
    
    Parameters:
    - tag: Type of question (e.g., "short answer", "multiple choice question", "true or false question")
    - difficulty: "easy", "medium", "hard"
    - context: Supporting context or passage
    - answer: Optional — if you want targeted question generation
    - num_questions: Number of diverse questions to generate
    - max_length: Max token length of generated output
    
    Returns:
    - List of generated questions as strings
    """
    # Format input text based on whether answer is provided
    answer_part = f"[{answer}]" if answer else ""
    input_text = f"<extra_id_97>{tag} <extra_id_98>{difficulty} <extra_id_99>{answer_part} {context}"

    # Tokenize input
    features = tokenizer([input_text], return_tensors='pt', truncation=True, padding=True)

    # Decide generation strategy
    if num_questions == 1:
        if use_beam_search:
            output = model.generate(
                input_ids=features['input_ids'],
                attention_mask=features['attention_mask'],
                max_length=max_length,
                num_beams=num_beams,
                early_stopping=False
            )
        else:
            output = model.generate(
                input_ids=features['input_ids'],
                attention_mask=features['attention_mask'],
                max_length=max_length,
                do_sample=False
            )
    else:
        output = model.generate(
            input_ids=features['input_ids'],
            attention_mask=features['attention_mask'],
            max_length=max_length,
            num_return_sequences=num_questions,
            do_sample=True,
            top_p=0.95,
            top_k=50
        )

    # Decode questions
    questions = [tokenizer.decode(out, skip_special_tokens=True) for out in output]
    return questions