Spaces:
Sleeping
Sleeping
orionweller
commited on
Commit
·
5dfae9b
1
Parent(s):
e001df1
try to fix ndcg bug
Browse files
app.py
CHANGED
@@ -213,15 +213,13 @@ def evaluate(qrels, results, k_values):
|
|
213 |
|
214 |
metrics = {}
|
215 |
for k in k_values:
|
216 |
-
|
217 |
-
|
|
|
|
|
218 |
logger.info(f"NDCG@{k}: mean={metrics[f'NDCG@{k}']}, min={min(ndcg_scores)}, max={max(ndcg_scores)}")
|
219 |
logger.info(f"Recall@{k}: mean={metrics[f'Recall@{k}']}, min={min(recall_scores)}, max={max(recall_scores)}")
|
220 |
|
221 |
-
# Add these lines
|
222 |
-
logger.info(f"Number of queries evaluated: {len(scores)}")
|
223 |
-
logger.info(f"Sample evaluation score: {list(scores.items())[0]}")
|
224 |
-
|
225 |
return metrics
|
226 |
|
227 |
@spaces.GPU
|
|
|
213 |
|
214 |
metrics = {}
|
215 |
for k in k_values:
|
216 |
+
ndcg_scores = [query_scores[f"ndcg_cut_{k}"] for query_scores in scores.values()]
|
217 |
+
recall_scores = [query_scores[f"recall_{k}"] for query_scores in scores.values()]
|
218 |
+
metrics[f"NDCG@{k}"] = round(np.mean(ndcg_scores), 3)
|
219 |
+
metrics[f"Recall@{k}"] = round(np.mean(recall_scores), 3)
|
220 |
logger.info(f"NDCG@{k}: mean={metrics[f'NDCG@{k}']}, min={min(ndcg_scores)}, max={max(ndcg_scores)}")
|
221 |
logger.info(f"Recall@{k}: mean={metrics[f'Recall@{k}']}, min={min(recall_scores)}, max={max(recall_scores)}")
|
222 |
|
|
|
|
|
|
|
|
|
223 |
return metrics
|
224 |
|
225 |
@spaces.GPU
|