ambivalent02
/

eval_qwen25_lora

Model card Files Files and versions

eval_qwen25_lora / eval_truthful_vi.py

ambivalent02's picture

Upload folder using huggingface_hub

e0765d0 verified 3 months ago

history blame contribute delete

3.4 kB

	import pandas as pd
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import jsonlines
	import sys
	from tqdm.auto import tqdm
	# --- Configuration ---
	MODEL_NAME = sys.argv[1]
	INPUT_FILENAME = "./Vietnamese truthful QA results.xlsx"
	OUTPUT_FILENAME = sys.argv[2]
	MAX_NEW_TOKENS = 512 # The maximum number of new tokens to generate for each answer.

	writer = jsonlines.open(OUTPUT_FILENAME, "w")
	# 1. Load data from an XLSX file
	try:
	df = pd.read_excel(INPUT_FILENAME)
	except FileNotFoundError:
	print(f"Error: The file '{INPUT_FILENAME}' was not found.")
	print("Please make sure your XLSX file is in the same directory as the script.")
	exit()
	except Exception as e:
	print(f"An error occurred while reading the Excel file: {e}")
	exit()

	# 2. Select Relevant Columns and validate
	if "Question" not in df.columns or "Ground truth" not in df.columns:
	print("Error: Required columns 'Question' and/or 'Ground truth' not found.")
	print(f"Available columns are: {list(df.columns)}")
	exit()

	df_processed = df[["Question", "Ground truth"]].copy()

	# 3. Load Model and Tokenizer
	print(f"Loading model '{MODEL_NAME}' and tokenizer...")
	# Set up device (use GPU if available, otherwise CPU)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(f"Using device: {device}")

	# Load the tokenizer and model from Hugging Face Hub
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.bfloat16, attn_implementation='flash_attention_2')
	model.to(device) # Move the model to the selected device

	# Set pad token if it's not set (GPT-2 doesn't have a default pad token)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	model.config.pad_token_id = model.config.eos_token_id

	print("Model and tokenizer loaded successfully.")

	# 4. Generate Answers using the Model
	answers = []
	out_dict = []
	total_questions = len(df_processed)
	print(f"Generating answers for {total_questions} questions...")

	for i, question in tqdm(enumerate(df_processed["Question"])):
	# Encode the question text into token IDs
	# input_ids = tokenizer.encode(question, return_tensors='pt').to(device)
	messages = [
	{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
	{"role": "user", "content": question}
	]
	input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	input_ids = tokenizer([input], return_tensors='pt').to(model.device)
	# Generate text using the model
	# do_sample=False makes the output deterministic (no randomness)
	output_sequences = model.generate(
	**input_ids,
	max_new_tokens=MAX_NEW_TOKENS,
	do_sample=False,
	pad_token_id=tokenizer.pad_token_id
	)

	# Decode the generated token IDs back to a string
	# The output includes the original prompt, so we need to remove it.
	full_text = tokenizer.decode(output_sequences[0][input_ids['input_ids'].shape[1]:], skip_special_tokens=True)
	answer = full_text.strip()
	gold = df['Ground truth'][i]
	answers.append(answer)
	print(f"Processed question {i + 1}/{total_questions}\nAnswer: {answer}\nGold: {gold}")
	writer.write({
	"question": question,
	"answer": answer,
	"gold": gold
	})