coda / compute_accuracy.py
justinkay
Correct bioclipv2 results
8a229fe
#!/usr/bin/env python3
"""
Compute top-1 accuracy for each model by comparing predictions with ground truth.
"""
import json
import os
from collections import OrderedDict
# Species mapping from demo/app.py
SPECIES_MAP = OrderedDict([
(24, "Jaguar"), # panthera onca
(10, "Ocelot"), # leopardus pardalis
(6, "Mountain Lion"), # puma concolor
(101, "Common Eland"), # tragelaphus oryx
(102, "Waterbuck"), # kobus ellipsiprymnus
])
def load_ground_truth():
"""Load ground truth labels from annotations."""
with open('iwildcam_demo_annotations.json', 'r') as f:
data = json.load(f)
# Create mapping from filename to true label
ground_truth = {}
for annotation in data['annotations']:
image_id = annotation['image_id']
category_id = annotation['category_id']
image_info = next((img for img in data['images'] if img['id'] == image_id), None)
if image_info:
filename = image_info['file_name']
true_label = SPECIES_MAP.get(category_id, "Unknown")
if true_label != "Unknown":
ground_truth[filename] = true_label
return ground_truth
def compute_accuracy(results_file, ground_truth):
"""Compute top-1 accuracy for a model's results."""
with open(results_file, 'r') as f:
data = json.load(f)
model_name = data['model']
results = data['results']
correct = 0
total = 0
for filename, scores in results.items():
if filename in ground_truth:
# Get predicted class (highest score)
predicted_class = max(scores, key=scores.get)
true_class = ground_truth[filename]
if predicted_class == true_class:
correct += 1
total += 1
accuracy = correct / total if total > 0 else 0.0
return accuracy, correct, total
def main():
"""Compute accuracy for all models."""
print("Computing top-1 accuracy for each model...\n")
# Load ground truth
ground_truth = load_ground_truth()
print(f"Loaded ground truth for {len(ground_truth)} images")
# Find all results files
results_files = [f for f in os.listdir('.') if f.startswith('zeroshot_results_') and f.endswith('.json')]
if not results_files:
print("No results files found!")
return
print(f"Found {len(results_files)} results files\n")
# Compute accuracy for each model
accuracies = {}
for results_file in sorted(results_files):
try:
accuracy, correct, total = compute_accuracy(results_file, ground_truth)
# Extract model name from filename
model_name = results_file.replace('zeroshot_results_', '').replace('.json', '').replace('_', '/')
accuracies[model_name] = {
'accuracy': accuracy,
'correct': correct,
'total': total
}
print(f"{model_name}:")
print(f" Accuracy: {accuracy:.4f} ({correct}/{total})")
print()
except Exception as e:
print(f"Error processing {results_file}: {e}")
# Summary
print("="*60)
print("SUMMARY")
print("="*60)
# Sort by accuracy
sorted_models = sorted(accuracies.items(), key=lambda x: x[1]['accuracy'], reverse=True)
for i, (model_name, stats) in enumerate(sorted_models, 1):
print(f"{i}. {model_name}: {stats['accuracy']:.4f}")
# Show some example predictions vs ground truth
print("\n" + "="*60)
print("SAMPLE PREDICTIONS (first 10 images)")
print("="*60)
if results_files:
# Use the first model's results to show examples
with open(results_files[0], 'r') as f:
data = json.load(f)
results = data['results']
count = 0
for filename, scores in results.items():
if filename in ground_truth and count < 10:
predicted_class = max(scores, key=scores.get)
true_class = ground_truth[filename]
confidence = scores[predicted_class]
status = "βœ“" if predicted_class == true_class else "βœ—"
print(f"{filename}:")
print(f" True: {true_class}")
print(f" Pred: {predicted_class} ({confidence:.4f}) {status}")
print()
count += 1
if __name__ == "__main__":
main()