|
|
|
|
|
""" |
|
|
Compute top-1 accuracy for each model by comparing predictions with ground truth. |
|
|
""" |
|
|
|
|
|
import json |
|
|
import os |
|
|
from collections import OrderedDict |
|
|
|
|
|
|
|
|
SPECIES_MAP = OrderedDict([ |
|
|
(24, "Jaguar"), |
|
|
(10, "Ocelot"), |
|
|
(6, "Mountain Lion"), |
|
|
(101, "Common Eland"), |
|
|
(102, "Waterbuck"), |
|
|
]) |
|
|
|
|
|
def load_ground_truth(): |
|
|
"""Load ground truth labels from annotations.""" |
|
|
with open('iwildcam_demo_annotations.json', 'r') as f: |
|
|
data = json.load(f) |
|
|
|
|
|
|
|
|
ground_truth = {} |
|
|
for annotation in data['annotations']: |
|
|
image_id = annotation['image_id'] |
|
|
category_id = annotation['category_id'] |
|
|
image_info = next((img for img in data['images'] if img['id'] == image_id), None) |
|
|
if image_info: |
|
|
filename = image_info['file_name'] |
|
|
true_label = SPECIES_MAP.get(category_id, "Unknown") |
|
|
if true_label != "Unknown": |
|
|
ground_truth[filename] = true_label |
|
|
|
|
|
return ground_truth |
|
|
|
|
|
def compute_accuracy(results_file, ground_truth): |
|
|
"""Compute top-1 accuracy for a model's results.""" |
|
|
with open(results_file, 'r') as f: |
|
|
data = json.load(f) |
|
|
|
|
|
model_name = data['model'] |
|
|
results = data['results'] |
|
|
|
|
|
correct = 0 |
|
|
total = 0 |
|
|
|
|
|
for filename, scores in results.items(): |
|
|
if filename in ground_truth: |
|
|
|
|
|
predicted_class = max(scores, key=scores.get) |
|
|
true_class = ground_truth[filename] |
|
|
|
|
|
if predicted_class == true_class: |
|
|
correct += 1 |
|
|
total += 1 |
|
|
|
|
|
accuracy = correct / total if total > 0 else 0.0 |
|
|
return accuracy, correct, total |
|
|
|
|
|
def main(): |
|
|
"""Compute accuracy for all models.""" |
|
|
print("Computing top-1 accuracy for each model...\n") |
|
|
|
|
|
|
|
|
ground_truth = load_ground_truth() |
|
|
print(f"Loaded ground truth for {len(ground_truth)} images") |
|
|
|
|
|
|
|
|
results_files = [f for f in os.listdir('.') if f.startswith('zeroshot_results_') and f.endswith('.json')] |
|
|
|
|
|
if not results_files: |
|
|
print("No results files found!") |
|
|
return |
|
|
|
|
|
print(f"Found {len(results_files)} results files\n") |
|
|
|
|
|
|
|
|
accuracies = {} |
|
|
for results_file in sorted(results_files): |
|
|
try: |
|
|
accuracy, correct, total = compute_accuracy(results_file, ground_truth) |
|
|
|
|
|
|
|
|
model_name = results_file.replace('zeroshot_results_', '').replace('.json', '').replace('_', '/') |
|
|
|
|
|
accuracies[model_name] = { |
|
|
'accuracy': accuracy, |
|
|
'correct': correct, |
|
|
'total': total |
|
|
} |
|
|
|
|
|
print(f"{model_name}:") |
|
|
print(f" Accuracy: {accuracy:.4f} ({correct}/{total})") |
|
|
print() |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error processing {results_file}: {e}") |
|
|
|
|
|
|
|
|
print("="*60) |
|
|
print("SUMMARY") |
|
|
print("="*60) |
|
|
|
|
|
|
|
|
sorted_models = sorted(accuracies.items(), key=lambda x: x[1]['accuracy'], reverse=True) |
|
|
|
|
|
for i, (model_name, stats) in enumerate(sorted_models, 1): |
|
|
print(f"{i}. {model_name}: {stats['accuracy']:.4f}") |
|
|
|
|
|
|
|
|
print("\n" + "="*60) |
|
|
print("SAMPLE PREDICTIONS (first 10 images)") |
|
|
print("="*60) |
|
|
|
|
|
if results_files: |
|
|
|
|
|
with open(results_files[0], 'r') as f: |
|
|
data = json.load(f) |
|
|
|
|
|
results = data['results'] |
|
|
count = 0 |
|
|
|
|
|
for filename, scores in results.items(): |
|
|
if filename in ground_truth and count < 10: |
|
|
predicted_class = max(scores, key=scores.get) |
|
|
true_class = ground_truth[filename] |
|
|
confidence = scores[predicted_class] |
|
|
|
|
|
status = "β" if predicted_class == true_class else "β" |
|
|
|
|
|
print(f"{filename}:") |
|
|
print(f" True: {true_class}") |
|
|
print(f" Pred: {predicted_class} ({confidence:.4f}) {status}") |
|
|
print() |
|
|
|
|
|
count += 1 |
|
|
|
|
|
if __name__ == "__main__": |
|
|
main() |