Spaces:

ucKaizen
/

anomaly_detection

Running

App Files Files Community

Uday Chitragar commited on Apr 10

Commit

f8731a3

1 Parent(s): 3632ba6

draft

Browse files

Files changed (4) hide show

Makefile +59 -0
README.md +47 -2
app.py +164 -0
requirements.txt +17 -0

Makefile ADDED Viewed

	@@ -0,0 +1,59 @@

+# Makefile for Rice Anomaly Detection UI Project
+# Variables
+VENV_DIR = venv
+PYTHON = python3
+PIP = $(VENV_DIR)/bin/pip
+APP = app.py  # Replace with the name of your main script file
+# Default target
+.PHONY: all
+all: setup install run
+# Create virtual environment
+.PHONY: setup
+setup:
+	@echo "Creating virtual environment..."
+	$(PYTHON) -m venv $(VENV_DIR)
+	@echo "Virtual environment created."
+# Install dependencies
+.PHONY: install
+install: setup
+	@echo "Installing dependencies..."
+	$(PIP) install --upgrade pip
+	$(PIP) install -r requirements.txt
+	@echo "Dependencies installed."
+# Run the Gradio app
+.PHONY: run
+run:
+	@echo "Running the Gradio app..."
+	$(VENV_DIR)/bin/python $(APP)
+# Clean up virtual environment and generated files
+.PHONY: clean
+clean:
+	@echo "Cleaning up..."
+	rm -rf $(VENV_DIR)
+	find . -type f -name "*.pyc" -delete
+	find . -type d -name "__pycache__" -delete
+	@echo "Cleaned up."
+# Rebuild everything from scratch
+.PHONY: rebuild
+rebuild: clean all
+# Help message
+.PHONY: help
+help:
+	@echo "Makefile for Rice Anomaly Detection UI Project"
+	@echo ""
+	@echo "Targets:"
+	@echo "  all       : Set up environment, install dependencies, and run the app (default)"
+	@echo "  setup     : Create a virtual environment"
+	@echo "  install   : Install dependencies from requirements.txt"
+	@echo "  run       : Run the Gradio app"
+	@echo "  clean     : Remove virtual environment and generated files"
+	@echo "  rebuild   : Clean and rebuild everything"
+	@echo "  help      : Show this help message"

README.md CHANGED Viewed

@@ -1,2 +1,47 @@
-# anomaly_detection
-Showcase for anomaly detection using pyod

+# Anomaly Detection UI
+![GitHub](https://img.shields.io/github/license/your-username/rice-anomaly-detection-ui?style=flat-square)
+![GitHub last commit](https://img.shields.io/github/last-commit/your-username/rice-anomaly-detection-ui?style=flat-square)
+![GitHub issues](https://img.shields.io/github/issues/your-username/rice-anomaly-detection-ui?style=flat-square)
+*Detect anomalies in rice images using unsupervised and semi-supervised learning with a user-friendly Gradio interface.*
+## Overview
+This project implements an anomaly detection system for rice images (Basmati vs. Jasmine) using deep learning and unsupervised learning techniques. It leverages EfficientNetB0 for feature extraction, PCA for dimensionality reduction, and PyOD models (e.g., Isolation Forest, LOF, OCSVM) for anomaly detection. The project includes a Gradio-based UI that allows users to:
+- Select between unsupervised and semi-supervised modes.
+- Choose different PyOD models for anomaly detection.
+- Tune model parameters (e.g., `contamination`, `n_estimators`).
+- Visualize results with a PCA scatter plot, classification metrics, and a list of detected outliers.
+The project is built with Python and uses libraries like TensorFlow, scikit-learn, PyOD, OpenCV, Matplotlib, and Gradio. It’s designed to run seamlessly in GitHub Codespaces for development and testing.
+## Features
+- **Anomaly Detection**: Detects Jasmine rice images as anomalies among Basmati images.
+- **Modes**: Supports both unsupervised and semi-supervised learning.
+- **Models**: Includes multiple PyOD models (Isolation Forest, Local Outlier Factor, One-Class SVM).
+- **Interactive UI**: Gradio interface with dropdowns, sliders, and visualizations.
+- **Visualization**: PCA scatter plot to visualize data distribution and detected outliers.
+- **Metrics**: Provides classification report, AUC score, and a list of detected outliers with filenames.
+## Prerequisites
+To run this project, you’ll need:
+- A GitHub account with access to [GitHub Codespaces](https://github.com/features/codespaces).
+- The [Rice Image Dataset](https://www.kaggle.com/datasets/muratkokludataset/rice-image-dataset) (you’ll need to upload it to Codespaces or adjust the dataset path in `app.py`).
+## Getting Started with GitHub Codespaces
+GitHub Codespaces provides a cloud-based development environment pre-configured with all the tools you need to run this project. Follow these steps to get started:
+1. **Open the Project in Codespaces**:
+   - Navigate to the repository: [your-username/rice-anomaly-detection-ui](https://github.com/your-username/rice-anomaly-detection-ui).
+   - Click the green **Code** button, then select **Open with Codespaces** > **New codespace**.
+   - Wait for the Codespace to set up (this may take a few minutes).
+2. **Set Up the Environment**:
+   - Codespaces will automatically create a virtual environment and install dependencies if a `requirements.txt` file is detected. However, to ensure everything is set up correctly, run:
+     ```bash
+     make

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import gradio as gr
+import os
+import numpy as np
+import cv2
+import random
+from sklearn.decomposition import PCA
+from sklearn.metrics import classification_report, roc_auc_score
+from tensorflow.keras.applications import EfficientNetB0
+from tensorflow.keras.models import Model
+from pyod.models.iforest import IForest
+from pyod.models.lof import LOF
+from pyod.models.ocsvm import OCSVM
+import matplotlib.pyplot as plt
+# Paths (adjust as needed)
+dataset_path = "/kaggle/input/rice-image-dataset/Rice_Image_Dataset"
+basmati_path = os.path.join(dataset_path, "Basmati")
+jasmine_path = os.path.join(dataset_path, "Jasmine")
+# Load and preprocess images (same as your code)
+def load_images_from_folder(folder, label, limit=None):
+    images = []
+    filenames = os.listdir(folder)
+    if limit:
+        filenames = random.sample(filenames, limit)
+    img_data = []
+    for filename in filenames:
+        img_path = os.path.join(folder, filename)
+        img = cv2.imread(img_path)
+        if img is not None:
+            img = cv2.resize(img, (128, 128))
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            img = preprocess_input(img.astype(np.float32))
+            images.append(img)
+            img_data.append((img, filename, label))
+    return np.array(images), img_data
+# Load data
+all_basmati_images, all_basmati_data = load_images_from_folder(basmati_path, label=0)
+jasmine_images, jasmine_data = load_images_from_folder(jasmine_path, label=1, limit=None)
+# Training and test sets
+basmati_train_count = int(0.2 * len(all_basmati_images))
+basmati_train_indices = random.sample(range(len(all_basmati_images)), basmati_train_count)
+X_train = np.array([all_basmati_images[i] for i in basmati_train_indices])
+train_data = [all_basmati_data[i] for i in basmati_train_indices]
+basmati_test_count = 200
+basmati_test_indices = random.sample(range(len(all_basmati_images)), basmati_test_count)
+X_test_basmati = np.array([all_basmati_images[i] for i in basmati_test_indices])
+test_data_basmati = [all_basmati_data[i] for i in basmati_test_indices]
+jasmine_test_count = 10
+jasmine_test_images, jasmine_test_data = load_images_from_folder(jasmine_path, label=1, limit=jasmine_test_count)
+X_test = np.concatenate([X_test_basmati, jasmine_test_images], axis=0)
+test_data = test_data_basmati + jasmine_test_data
+y_test = np.array([0] * len(X_test_basmati) + [1] * len(jasmine_test_images))
+# Feature extraction
+base_model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg', input_shape=(128, 128, 3))
+feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)
+def extract_features(images, batch_size=16):
+    return feature_extractor.predict(images, batch_size=batch_size, verbose=1)
+X_train_features = extract_features(X_train)
+X_test_features = extract_features(X_test)
+# PCA
+pca = PCA(n_components=100)
+X_train_reduced = pca.fit_transform(X_train_features)
+X_test_reduced = pca.transform(X_test_features)
+# Main anomaly detection function
+def run_anomaly_detection(mode, model_name, contamination, n_estimators, n_neighbors, nu):
+    # Adjust training data for semi-supervised mode
+    if mode == "Semi-supervised":
+        # Add a small portion of Jasmine to training (e.g., 5 images)
+        jasmine_train_count = 5
+        jasmine_train_images, jasmine_train_data = load_images_from_folder(jasmine_path, label=1, limit=jasmine_train_count)
+        X_train_semi = np.concatenate([X_train, jasmine_train_images], axis=0)
+        X_train_semi_features = extract_features(X_train_semi)
+        X_train_semi_reduced = pca.transform(X_train_semi_features)
+    else:
+        X_train_semi_reduced = X_train_reduced
+    # Initialize model based on selection
+    if model_name == "IForest":
+        outlier_detector = IForest(contamination=contamination, n_estimators=int(n_estimators))
+    elif model_name == "LOF":
+        outlier_detector = LOF(contamination=contamination, n_neighbors=int(n_neighbors))
+    else:  # OCSVM
+        outlier_detector = OCSVM(contamination=contamination, nu=nu)
+    # Fit and predict
+    outlier_detector.fit(X_train_semi_reduced)
+    predictions = outlier_detector.predict(X_test_reduced)
+    # Evaluation
+    report = classification_report(y_test, predictions)
+    try:
+        auc_score = roc_auc_score(y_test, predictions)
+        auc_text = f"AUC Score: {auc_score:.4f}"
+    except:
+        auc_text = "AUC Score could not be calculated."
+    # Outlier filenames
+    outlier_indices = np.where(predictions == 1)[0]
+    outlier_list = []
+    for idx in outlier_indices:
+        img, filename, label = test_data[idx]
+        rice_type = "Jasmine" if label == 1 else "Basmati"
+        outlier_list.append(f"Filename: {filename}, Actual Label: {rice_type}")
+    outlier_text = "\n".join(outlier_list) if outlier_list else "No outliers detected."
+    # PCA Visualization (2D)
+    pca_vis = PCA(n_components=2)
+    X_test_2d = pca_vis.fit_transform(X_test_features)
+    plt.figure(figsize=(10, 7))
+    plt.scatter(X_test_2d[y_test == 0, 0], X_test_2d[y_test == 0, 1], c='blue', label='Basmati', alpha=0.6, s=40)
+    plt.scatter(X_test_2d[y_test == 1, 0], X_test_2d[y_test == 1, 1], c='red', label='Jasmine', alpha=0.6, s=40)
+    plt.scatter(X_test_2d[outlier_indices, 0], X_test_2d[outlier_indices, 1],
+                facecolors='none', edgecolors='black', linewidths=1.5, label='Outliers', s=80)
+    plt.title("PCA Projection with Outliers")
+    plt.xlabel("PCA Component 1")
+    plt.ylabel("PCA Component 2")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    return report, auc_text, outlier_text, plt
+# Gradio Interface
+with gr.Blocks() as interface:
+    gr.Markdown("## Rice Anomaly Detection UI")
+    with gr.Row():
+        mode = gr.Dropdown(["Unsupervised", "Semi-supervised"], label="Mode")
+        model_name = gr.Dropdown(["IForest", "LOF", "OCSVM"], label="Model")
+    with gr.Row():
+        contamination = gr.Slider(0, 0.5, value=0.05, step=0.01, label="Contamination")
+        n_estimators = gr.Slider(100, 500, value=100, step=10, label="N Estimators (IForest)")
+        n_neighbors = gr.Slider(5, 50, value=20, step=1, label="N Neighbors (LOF)")
+        nu = gr.Slider(0, 1, value=0.1, step=0.01, label="Nu (OCSVM)")
+    submit_btn = gr.Button("Run Detection")
+    with gr.Row():
+        report_output = gr.Textbox(label="Classification Report")
+        auc_output = gr.Textbox(label="AUC Score")
+    outlier_output = gr.Textbox(label="Detected Outliers")
+    plot_output = gr.Plot(label="PCA Projection")
+    submit_btn.click(
+        fn=run_anomaly_detection,
+        inputs=[mode, model_name, contamination, n_estimators, n_neighbors, nu],
+        outputs=[report_output, auc_output, outlier_output, plot_output]
+    )
+interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+# Core ML and data processing
+numpy>=1.23.0
+scikit-learn>=1.2.0
+tensorflow>=2.10.0
+pyod>=1.1.0
+# Image processing
+opencv-python>=4.7.0
+# Plotting
+matplotlib>=3.6.0
+# UI
+gradio>=4.0.0
+# Optional: for better reproducibility
+pandas>=1.5.0