Spaces:

justinkay
/

coda

Sleeping

coda / compute_accuracy.py

justinkay

Correct bioclipv2 results

8a229fe about 2 months ago

4.43 kB

	#!/usr/bin/env python3
	"""
	Compute top-1 accuracy for each model by comparing predictions with ground truth.
	"""

	import json
	import os
	from collections import OrderedDict

	# Species mapping from demo/app.py
	SPECIES_MAP = OrderedDict([
	(24, "Jaguar"), # panthera onca
	(10, "Ocelot"), # leopardus pardalis
	(6, "Mountain Lion"), # puma concolor
	(101, "Common Eland"), # tragelaphus oryx
	(102, "Waterbuck"), # kobus ellipsiprymnus
	])

	def load_ground_truth():
	"""Load ground truth labels from annotations."""
	with open('iwildcam_demo_annotations.json', 'r') as f:
	data = json.load(f)

	# Create mapping from filename to true label
	ground_truth = {}
	for annotation in data['annotations']:
	image_id = annotation['image_id']
	category_id = annotation['category_id']
	image_info = next((img for img in data['images'] if img['id'] == image_id), None)
	if image_info:
	filename = image_info['file_name']
	true_label = SPECIES_MAP.get(category_id, "Unknown")
	if true_label != "Unknown":
	ground_truth[filename] = true_label

	return ground_truth

	def compute_accuracy(results_file, ground_truth):
	"""Compute top-1 accuracy for a model's results."""
	with open(results_file, 'r') as f:
	data = json.load(f)

	model_name = data['model']
	results = data['results']

	correct = 0
	total = 0

	for filename, scores in results.items():
	if filename in ground_truth:
	# Get predicted class (highest score)
	predicted_class = max(scores, key=scores.get)
	true_class = ground_truth[filename]

	if predicted_class == true_class:
	correct += 1
	total += 1

	accuracy = correct / total if total > 0 else 0.0
	return accuracy, correct, total

	def main():
	"""Compute accuracy for all models."""
	print("Computing top-1 accuracy for each model...\n")

	# Load ground truth
	ground_truth = load_ground_truth()
	print(f"Loaded ground truth for {len(ground_truth)} images")

	# Find all results files
	results_files = [f for f in os.listdir('.') if f.startswith('zeroshot_results_') and f.endswith('.json')]

	if not results_files:
	print("No results files found!")
	return

	print(f"Found {len(results_files)} results files\n")

	# Compute accuracy for each model
	accuracies = {}
	for results_file in sorted(results_files):
	try:
	accuracy, correct, total = compute_accuracy(results_file, ground_truth)

	# Extract model name from filename
	model_name = results_file.replace('zeroshot_results_', '').replace('.json', '').replace('_', '/')

	accuracies[model_name] = {
	'accuracy': accuracy,
	'correct': correct,
	'total': total
	}

	print(f"{model_name}:")
	print(f" Accuracy: {accuracy:.4f} ({correct}/{total})")
	print()

	except Exception as e:
	print(f"Error processing {results_file}: {e}")

	# Summary
	print("="*60)
	print("SUMMARY")
	print("="*60)

	# Sort by accuracy
	sorted_models = sorted(accuracies.items(), key=lambda x: x[1]['accuracy'], reverse=True)

	for i, (model_name, stats) in enumerate(sorted_models, 1):
	print(f"{i}. {model_name}: {stats['accuracy']:.4f}")

	# Show some example predictions vs ground truth
	print("\n" + "="*60)
	print("SAMPLE PREDICTIONS (first 10 images)")
	print("="*60)

	if results_files:
	# Use the first model's results to show examples
	with open(results_files[0], 'r') as f:
	data = json.load(f)

	results = data['results']
	count = 0

	for filename, scores in results.items():
	if filename in ground_truth and count < 10:
	predicted_class = max(scores, key=scores.get)
	true_class = ground_truth[filename]
	confidence = scores[predicted_class]

	status = "✓" if predicted_class == true_class else "✗"

	print(f"{filename}:")
	print(f" True: {true_class}")
	print(f" Pred: {predicted_class} ({confidence:.4f}) {status}")
	print()

	count += 1

	if __name__ == "__main__":
	main()