eval_qwen25_lora / eval_truthful_vi.py
ambivalent02's picture
Upload folder using huggingface_hub
e0765d0 verified
import pandas as pd
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import jsonlines
import sys
from tqdm.auto import tqdm
# --- Configuration ---
MODEL_NAME = sys.argv[1]
INPUT_FILENAME = "./Vietnamese truthful QA results.xlsx"
OUTPUT_FILENAME = sys.argv[2]
MAX_NEW_TOKENS = 512 # The maximum number of new tokens to generate for each answer.
writer = jsonlines.open(OUTPUT_FILENAME, "w")
# 1. Load data from an XLSX file
try:
df = pd.read_excel(INPUT_FILENAME)
except FileNotFoundError:
print(f"Error: The file '{INPUT_FILENAME}' was not found.")
print("Please make sure your XLSX file is in the same directory as the script.")
exit()
except Exception as e:
print(f"An error occurred while reading the Excel file: {e}")
exit()
# 2. Select Relevant Columns and validate
if "Question" not in df.columns or "Ground truth" not in df.columns:
print("Error: Required columns 'Question' and/or 'Ground truth' not found.")
print(f"Available columns are: {list(df.columns)}")
exit()
df_processed = df[["Question", "Ground truth"]].copy()
# 3. Load Model and Tokenizer
print(f"Loading model '{MODEL_NAME}' and tokenizer...")
# Set up device (use GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
# Load the tokenizer and model from Hugging Face Hub
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, attn_implementation='flash_attention_2')
model.to(device) # Move the model to the selected device
# Set pad token if it's not set (GPT-2 doesn't have a default pad token)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = model.config.eos_token_id
print("Model and tokenizer loaded successfully.")
# 4. Generate Answers using the Model
answers = []
out_dict = []
total_questions = len(df_processed)
print(f"Generating answers for {total_questions} questions...")
for i, question in tqdm(enumerate(df_processed["Question"])):
# Encode the question text into token IDs
# input_ids = tokenizer.encode(question, return_tensors='pt').to(device)
messages = [
{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
{"role": "user", "content": question}
]
input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
input_ids = tokenizer([input], return_tensors='pt').to(model.device)
# Generate text using the model
# do_sample=False makes the output deterministic (no randomness)
output_sequences = model.generate(
**input_ids,
max_new_tokens=MAX_NEW_TOKENS,
do_sample=False,
pad_token_id=tokenizer.pad_token_id
)
# Decode the generated token IDs back to a string
# The output includes the original prompt, so we need to remove it.
full_text = tokenizer.decode(output_sequences[0][input_ids['input_ids'].shape[1]:], skip_special_tokens=True)
answer = full_text.strip()
gold = df['Ground truth'][i]
answers.append(answer)
print(f"Processed question {i + 1}/{total_questions}\nAnswer: {answer}\nGold: {gold}")
writer.write({
"question": question,
"answer": answer,
"gold": gold
})