File size: 2,565 Bytes
e418977
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import pickle
from .feature_extractor import FeatureExtractor
import time
from tqdm import tqdm

def precompute_embeddings(image_dir='data/images', output_path='data/embeddings.pkl'):
    # Initialize the feature extractor
    extractor = FeatureExtractor()

    embeddings = []
    image_paths = []

    # Get total number of valid images
    valid_images = [f for f in os.listdir(image_dir) 
                   if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    total_images = len(valid_images)
    
    print(f"\nFound {total_images} images to process")
    
    # Estimate time (assuming ~1 second per image for EfficientNetB0)
    estimated_time = total_images * 1  # 1 second per image
    print(f"Estimated time: {estimated_time//60} minutes and {estimated_time%60} seconds\n")

    # Use tqdm for progress bar
    start_time = time.time()
    for idx, filename in enumerate(tqdm(valid_images, desc="Processing images")):
        if filename.endswith(('.png', '.jpg', '.jpeg')):
            img_path = os.path.join(image_dir, filename)
            try:
                # Show current image being processed
                print(f"\rProcessing image {idx+1}/{total_images}: {filename}", end="")
                
                embedding = extractor.extract_features(img_path)
                embeddings.append(embedding)
                image_paths.append(img_path)
                
                # Calculate and show remaining time
                elapsed_time = time.time() - start_time
                avg_time_per_image = elapsed_time / (idx + 1)
                remaining_images = total_images - (idx + 1)
                estimated_remaining_time = remaining_images * avg_time_per_image
                
                print(f" | Remaining time: {estimated_remaining_time//60:.0f}m {estimated_remaining_time%60:.0f}s")
                
            except Exception as e:
                print(f"\nError processing {filename}: {e}")

    # Save embeddings and paths
    with open(output_path, 'wb') as f:
        pickle.dump({'embeddings': embeddings, 'image_paths': image_paths}, f)

    total_time = time.time() - start_time
    print(f"\nProcessing complete!")
    print(f"Total time taken: {total_time//60:.0f} minutes and {total_time%60:.0f} seconds")
    print(f"Successfully processed {len(embeddings)}/{total_images} images")
    print(f"Embeddings saved to {output_path}")
    
    return embeddings, image_paths

if __name__ == "__main__":
    precompute_embeddings()