Uday Chitragar commited on
Commit
f8731a3
·
1 Parent(s): 3632ba6
Files changed (4) hide show
  1. Makefile +59 -0
  2. README.md +47 -2
  3. app.py +164 -0
  4. requirements.txt +17 -0
Makefile ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Makefile for Rice Anomaly Detection UI Project
2
+
3
+ # Variables
4
+ VENV_DIR = venv
5
+ PYTHON = python3
6
+ PIP = $(VENV_DIR)/bin/pip
7
+ APP = app.py # Replace with the name of your main script file
8
+
9
+ # Default target
10
+ .PHONY: all
11
+ all: setup install run
12
+
13
+ # Create virtual environment
14
+ .PHONY: setup
15
+ setup:
16
+ @echo "Creating virtual environment..."
17
+ $(PYTHON) -m venv $(VENV_DIR)
18
+ @echo "Virtual environment created."
19
+
20
+ # Install dependencies
21
+ .PHONY: install
22
+ install: setup
23
+ @echo "Installing dependencies..."
24
+ $(PIP) install --upgrade pip
25
+ $(PIP) install -r requirements.txt
26
+ @echo "Dependencies installed."
27
+
28
+ # Run the Gradio app
29
+ .PHONY: run
30
+ run:
31
+ @echo "Running the Gradio app..."
32
+ $(VENV_DIR)/bin/python $(APP)
33
+
34
+ # Clean up virtual environment and generated files
35
+ .PHONY: clean
36
+ clean:
37
+ @echo "Cleaning up..."
38
+ rm -rf $(VENV_DIR)
39
+ find . -type f -name "*.pyc" -delete
40
+ find . -type d -name "__pycache__" -delete
41
+ @echo "Cleaned up."
42
+
43
+ # Rebuild everything from scratch
44
+ .PHONY: rebuild
45
+ rebuild: clean all
46
+
47
+ # Help message
48
+ .PHONY: help
49
+ help:
50
+ @echo "Makefile for Rice Anomaly Detection UI Project"
51
+ @echo ""
52
+ @echo "Targets:"
53
+ @echo " all : Set up environment, install dependencies, and run the app (default)"
54
+ @echo " setup : Create a virtual environment"
55
+ @echo " install : Install dependencies from requirements.txt"
56
+ @echo " run : Run the Gradio app"
57
+ @echo " clean : Remove virtual environment and generated files"
58
+ @echo " rebuild : Clean and rebuild everything"
59
+ @echo " help : Show this help message"
README.md CHANGED
@@ -1,2 +1,47 @@
1
- # anomaly_detection
2
- Showcase for anomaly detection using pyod
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Anomaly Detection UI
2
+
3
+ ![GitHub](https://img.shields.io/github/license/your-username/rice-anomaly-detection-ui?style=flat-square)
4
+ ![GitHub last commit](https://img.shields.io/github/last-commit/your-username/rice-anomaly-detection-ui?style=flat-square)
5
+ ![GitHub issues](https://img.shields.io/github/issues/your-username/rice-anomaly-detection-ui?style=flat-square)
6
+
7
+ *Detect anomalies in rice images using unsupervised and semi-supervised learning with a user-friendly Gradio interface.*
8
+
9
+ ## Overview
10
+
11
+ This project implements an anomaly detection system for rice images (Basmati vs. Jasmine) using deep learning and unsupervised learning techniques. It leverages EfficientNetB0 for feature extraction, PCA for dimensionality reduction, and PyOD models (e.g., Isolation Forest, LOF, OCSVM) for anomaly detection. The project includes a Gradio-based UI that allows users to:
12
+
13
+ - Select between unsupervised and semi-supervised modes.
14
+ - Choose different PyOD models for anomaly detection.
15
+ - Tune model parameters (e.g., `contamination`, `n_estimators`).
16
+ - Visualize results with a PCA scatter plot, classification metrics, and a list of detected outliers.
17
+
18
+ The project is built with Python and uses libraries like TensorFlow, scikit-learn, PyOD, OpenCV, Matplotlib, and Gradio. It’s designed to run seamlessly in GitHub Codespaces for development and testing.
19
+
20
+ ## Features
21
+
22
+ - **Anomaly Detection**: Detects Jasmine rice images as anomalies among Basmati images.
23
+ - **Modes**: Supports both unsupervised and semi-supervised learning.
24
+ - **Models**: Includes multiple PyOD models (Isolation Forest, Local Outlier Factor, One-Class SVM).
25
+ - **Interactive UI**: Gradio interface with dropdowns, sliders, and visualizations.
26
+ - **Visualization**: PCA scatter plot to visualize data distribution and detected outliers.
27
+ - **Metrics**: Provides classification report, AUC score, and a list of detected outliers with filenames.
28
+
29
+ ## Prerequisites
30
+
31
+ To run this project, you’ll need:
32
+ - A GitHub account with access to [GitHub Codespaces](https://github.com/features/codespaces).
33
+ - The [Rice Image Dataset](https://www.kaggle.com/datasets/muratkokludataset/rice-image-dataset) (you’ll need to upload it to Codespaces or adjust the dataset path in `app.py`).
34
+
35
+ ## Getting Started with GitHub Codespaces
36
+
37
+ GitHub Codespaces provides a cloud-based development environment pre-configured with all the tools you need to run this project. Follow these steps to get started:
38
+
39
+ 1. **Open the Project in Codespaces**:
40
+ - Navigate to the repository: [your-username/rice-anomaly-detection-ui](https://github.com/your-username/rice-anomaly-detection-ui).
41
+ - Click the green **Code** button, then select **Open with Codespaces** > **New codespace**.
42
+ - Wait for the Codespace to set up (this may take a few minutes).
43
+
44
+ 2. **Set Up the Environment**:
45
+ - Codespaces will automatically create a virtual environment and install dependencies if a `requirements.txt` file is detected. However, to ensure everything is set up correctly, run:
46
+ ```bash
47
+ make
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import numpy as np
4
+ import cv2
5
+ import random
6
+ from sklearn.decomposition import PCA
7
+ from sklearn.metrics import classification_report, roc_auc_score
8
+ from tensorflow.keras.applications import EfficientNetB0
9
+ from tensorflow.keras.models import Model
10
+ from pyod.models.iforest import IForest
11
+ from pyod.models.lof import LOF
12
+ from pyod.models.ocsvm import OCSVM
13
+ import matplotlib.pyplot as plt
14
+
15
+ # Paths (adjust as needed)
16
+ dataset_path = "/kaggle/input/rice-image-dataset/Rice_Image_Dataset"
17
+ basmati_path = os.path.join(dataset_path, "Basmati")
18
+ jasmine_path = os.path.join(dataset_path, "Jasmine")
19
+
20
+ # Load and preprocess images (same as your code)
21
+ def load_images_from_folder(folder, label, limit=None):
22
+ images = []
23
+ filenames = os.listdir(folder)
24
+ if limit:
25
+ filenames = random.sample(filenames, limit)
26
+ img_data = []
27
+ for filename in filenames:
28
+ img_path = os.path.join(folder, filename)
29
+ img = cv2.imread(img_path)
30
+ if img is not None:
31
+ img = cv2.resize(img, (128, 128))
32
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
33
+ img = preprocess_input(img.astype(np.float32))
34
+ images.append(img)
35
+ img_data.append((img, filename, label))
36
+ return np.array(images), img_data
37
+
38
+ # Load data
39
+ all_basmati_images, all_basmati_data = load_images_from_folder(basmati_path, label=0)
40
+ jasmine_images, jasmine_data = load_images_from_folder(jasmine_path, label=1, limit=None)
41
+
42
+ # Training and test sets
43
+ basmati_train_count = int(0.2 * len(all_basmati_images))
44
+ basmati_train_indices = random.sample(range(len(all_basmati_images)), basmati_train_count)
45
+ X_train = np.array([all_basmati_images[i] for i in basmati_train_indices])
46
+ train_data = [all_basmati_data[i] for i in basmati_train_indices]
47
+
48
+ basmati_test_count = 200
49
+ basmati_test_indices = random.sample(range(len(all_basmati_images)), basmati_test_count)
50
+ X_test_basmati = np.array([all_basmati_images[i] for i in basmati_test_indices])
51
+ test_data_basmati = [all_basmati_data[i] for i in basmati_test_indices]
52
+
53
+ jasmine_test_count = 10
54
+ jasmine_test_images, jasmine_test_data = load_images_from_folder(jasmine_path, label=1, limit=jasmine_test_count)
55
+
56
+ X_test = np.concatenate([X_test_basmati, jasmine_test_images], axis=0)
57
+ test_data = test_data_basmati + jasmine_test_data
58
+ y_test = np.array([0] * len(X_test_basmati) + [1] * len(jasmine_test_images))
59
+
60
+ # Feature extraction
61
+ base_model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg', input_shape=(128, 128, 3))
62
+ feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)
63
+
64
+ def extract_features(images, batch_size=16):
65
+ return feature_extractor.predict(images, batch_size=batch_size, verbose=1)
66
+
67
+ X_train_features = extract_features(X_train)
68
+ X_test_features = extract_features(X_test)
69
+
70
+ # PCA
71
+ pca = PCA(n_components=100)
72
+ X_train_reduced = pca.fit_transform(X_train_features)
73
+ X_test_reduced = pca.transform(X_test_features)
74
+
75
+ # Main anomaly detection function
76
+ def run_anomaly_detection(mode, model_name, contamination, n_estimators, n_neighbors, nu):
77
+ # Adjust training data for semi-supervised mode
78
+ if mode == "Semi-supervised":
79
+ # Add a small portion of Jasmine to training (e.g., 5 images)
80
+ jasmine_train_count = 5
81
+ jasmine_train_images, jasmine_train_data = load_images_from_folder(jasmine_path, label=1, limit=jasmine_train_count)
82
+ X_train_semi = np.concatenate([X_train, jasmine_train_images], axis=0)
83
+ X_train_semi_features = extract_features(X_train_semi)
84
+ X_train_semi_reduced = pca.transform(X_train_semi_features)
85
+ else:
86
+ X_train_semi_reduced = X_train_reduced
87
+
88
+ # Initialize model based on selection
89
+ if model_name == "IForest":
90
+ outlier_detector = IForest(contamination=contamination, n_estimators=int(n_estimators))
91
+ elif model_name == "LOF":
92
+ outlier_detector = LOF(contamination=contamination, n_neighbors=int(n_neighbors))
93
+ else: # OCSVM
94
+ outlier_detector = OCSVM(contamination=contamination, nu=nu)
95
+
96
+ # Fit and predict
97
+ outlier_detector.fit(X_train_semi_reduced)
98
+ predictions = outlier_detector.predict(X_test_reduced)
99
+
100
+ # Evaluation
101
+ report = classification_report(y_test, predictions)
102
+ try:
103
+ auc_score = roc_auc_score(y_test, predictions)
104
+ auc_text = f"AUC Score: {auc_score:.4f}"
105
+ except:
106
+ auc_text = "AUC Score could not be calculated."
107
+
108
+ # Outlier filenames
109
+ outlier_indices = np.where(predictions == 1)[0]
110
+ outlier_list = []
111
+ for idx in outlier_indices:
112
+ img, filename, label = test_data[idx]
113
+ rice_type = "Jasmine" if label == 1 else "Basmati"
114
+ outlier_list.append(f"Filename: {filename}, Actual Label: {rice_type}")
115
+ outlier_text = "\n".join(outlier_list) if outlier_list else "No outliers detected."
116
+
117
+ # PCA Visualization (2D)
118
+ pca_vis = PCA(n_components=2)
119
+ X_test_2d = pca_vis.fit_transform(X_test_features)
120
+
121
+ plt.figure(figsize=(10, 7))
122
+ plt.scatter(X_test_2d[y_test == 0, 0], X_test_2d[y_test == 0, 1], c='blue', label='Basmati', alpha=0.6, s=40)
123
+ plt.scatter(X_test_2d[y_test == 1, 0], X_test_2d[y_test == 1, 1], c='red', label='Jasmine', alpha=0.6, s=40)
124
+ plt.scatter(X_test_2d[outlier_indices, 0], X_test_2d[outlier_indices, 1],
125
+ facecolors='none', edgecolors='black', linewidths=1.5, label='Outliers', s=80)
126
+ plt.title("PCA Projection with Outliers")
127
+ plt.xlabel("PCA Component 1")
128
+ plt.ylabel("PCA Component 2")
129
+ plt.legend()
130
+ plt.grid(True)
131
+ plt.tight_layout()
132
+
133
+ return report, auc_text, outlier_text, plt
134
+
135
+ # Gradio Interface
136
+ with gr.Blocks() as interface:
137
+ gr.Markdown("## Rice Anomaly Detection UI")
138
+
139
+ with gr.Row():
140
+ mode = gr.Dropdown(["Unsupervised", "Semi-supervised"], label="Mode")
141
+ model_name = gr.Dropdown(["IForest", "LOF", "OCSVM"], label="Model")
142
+
143
+ with gr.Row():
144
+ contamination = gr.Slider(0, 0.5, value=0.05, step=0.01, label="Contamination")
145
+ n_estimators = gr.Slider(100, 500, value=100, step=10, label="N Estimators (IForest)")
146
+ n_neighbors = gr.Slider(5, 50, value=20, step=1, label="N Neighbors (LOF)")
147
+ nu = gr.Slider(0, 1, value=0.1, step=0.01, label="Nu (OCSVM)")
148
+
149
+ submit_btn = gr.Button("Run Detection")
150
+
151
+ with gr.Row():
152
+ report_output = gr.Textbox(label="Classification Report")
153
+ auc_output = gr.Textbox(label="AUC Score")
154
+
155
+ outlier_output = gr.Textbox(label="Detected Outliers")
156
+ plot_output = gr.Plot(label="PCA Projection")
157
+
158
+ submit_btn.click(
159
+ fn=run_anomaly_detection,
160
+ inputs=[mode, model_name, contamination, n_estimators, n_neighbors, nu],
161
+ outputs=[report_output, auc_output, outlier_output, plot_output]
162
+ )
163
+
164
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core ML and data processing
2
+ numpy>=1.23.0
3
+ scikit-learn>=1.2.0
4
+ tensorflow>=2.10.0
5
+ pyod>=1.1.0
6
+
7
+ # Image processing
8
+ opencv-python>=4.7.0
9
+
10
+ # Plotting
11
+ matplotlib>=3.6.0
12
+
13
+ # UI
14
+ gradio>=4.0.0
15
+
16
+ # Optional: for better reproducibility
17
+ pandas>=1.5.0