|
|
|
import json |
|
|
|
|
|
def get_query(messages, num_turns=5): |
|
|
|
|
|
query = "" |
|
for item in messages[-num_turns:]: |
|
item['role'] = item['role'].replace("assistant", "agent") |
|
query += "{}: {}\n".format(item['role'], item['content']) |
|
query = query.strip() |
|
|
|
return query |
|
|
|
|
|
def get_query_with_topic(messages, topic, num_turns=3): |
|
|
|
|
|
query = "" |
|
for item in messages[-num_turns:]: |
|
item['role'] = item['role'].replace("assistant", "agent") |
|
if item['role'] == 'user': |
|
query += "{}: this is a question about {}. {}\n".format(item['role'], topic, item['content']) |
|
else: |
|
query += "{}: {}\n".format(item['role'], item['content']) |
|
query = query.strip() |
|
|
|
return query |
|
|
|
|
|
def get_data_for_evaluation(input_datapath, document_datapath, dataset_name): |
|
|
|
print('reading evaluation data from %s' % input_datapath) |
|
with open(input_datapath, "r") as f: |
|
input_list = json.load(f) |
|
|
|
print('reading documents from %s' % document_datapath) |
|
with open(document_datapath, "r") as f: |
|
documents = json.load(f) |
|
|
|
eval_data = {} |
|
for item in input_list: |
|
""" |
|
We incorporate topic information for topiocqa and inscit datasets: |
|
query = get_query_with_topic(item['messages'], item['topic']) |
|
""" |
|
query = get_query(item['messages']) |
|
|
|
doc_id = item['document'] |
|
gold_idx = item['ground_truth_ctx']['index'] |
|
|
|
if dataset_name == 'qrecc': |
|
""" |
|
The 'gold context' for the qrecc dataset is obtained based on the word |
|
overlaps between gold answer and each context in the document, which might |
|
not be the real gold context. |
|
|
|
To improve the evaluation quality of this dataset, |
|
we further add the answer of the query into the 'gold context' |
|
to ensure the 'gold context' is the most relevant chunk to the query. |
|
|
|
Note that this is just for the retrieval evaluation purpose, we do not |
|
add answer to the context for the ChatRAG evaluation. |
|
""" |
|
answer = item['answers'][0] |
|
documents[doc_id][gold_idx] += " || " + answer |
|
|
|
if doc_id not in eval_data: |
|
eval_data[doc_id] = [{"query": query, "gold_idx": gold_idx}] |
|
else: |
|
eval_data[doc_id].append({"query": query, "gold_idx": gold_idx}) |
|
|
|
return eval_data, documents |
|
|