Spaces:
Runtime error
Runtime error
## CSCI4750/5750: homework03 submission | |
## load the dataset | |
def hw03_derive_MNIST_train_test_data(): | |
from sklearn.datasets import fetch_openml | |
import numpy as np | |
mnist = fetch_openml('mnist_784', version=1, as_frame=False) | |
X, y = mnist["data"], mnist["target"] | |
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:] | |
y_train = y_train.astype(np.int) # convert to int | |
y_test = y_test.astype(np.int) # convert to int | |
return X_train, X_test, y_train, y_test | |
X_train, X_test, y_train, y_test = hw03_derive_MNIST_train_test_data() | |
print("X_train.shape: ", X_train.shape) | |
print("X_test.shape: ", X_test.shape) | |
print("y_train.shape: ", y_train.shape) | |
print("y_test.shape: ", y_test.shape) | |
train_features = X_train | |
train_labels = y_train | |
test_feature = X_test[0] | |
K = 3 | |
print("train_features: ",train_features.shape) | |
print("train_labels: ",train_labels.shape) | |
print("test_feature: ",test_feature.shape) | |
# Practice 5: deploy our KNN classifier to web application, with multiple outputs | |
import scipy | |
import gradio as gr | |
import numpy as np | |
import cv2 | |
import os | |
def get_sample_images(num_images): | |
sample_images = [] | |
for i in range(num_images): | |
test_feature = X_test[i] | |
test_feature_2d =test_feature.reshape(28,28) | |
# Make it unsigned integers: | |
data = test_feature_2d.astype(np.uint8) | |
outdir = "images_folder" | |
img_path = os.path.join(outdir, 'local_%05d.png' % (i,)) | |
if not os.path.exists(outdir): | |
os.mkdir(outdir) | |
cv2.imwrite(img_path, data) | |
sample_images.append([img_path,int(np.random.choice([7,9,11,13,15,24]))]) # ["image path", "K"] | |
return sample_images | |
# EXTRA: adapted from https://github.com/ageron/handson-ml2/blob/master/03_classification.ipynb | |
def plot_digits(instances, images_per_row=3): | |
import matplotlib.pyplot as plt | |
import matplotlib as mpl | |
size = 28 | |
images_per_row = min(len(instances), images_per_row) | |
# This is equivalent to n_rows = ceil(len(instances) / images_per_row): | |
n_rows = (len(instances) - 1) // images_per_row + 1 | |
n = len(instances) | |
fig = plt.figure(figsize=(15,8)) | |
for i in range(len(instances)): | |
# Debug, plot figure | |
fig.add_subplot(n_rows, images_per_row, i + 1) | |
#print(instances[i]) | |
plt.imshow(instances[i].reshape(size,size), cmap = mpl.cm.binary) | |
plt.axis("off") | |
plt.title("Neighbor "+str(i+1), size=20) | |
fig.tight_layout() | |
plt.savefig('results.png', dpi=300) | |
return 'results.png' | |
## machine learning classifier | |
def KNN_predict(train_features, train_labels, test_feature, K): | |
label_record = [] | |
for i in range(len(train_features)): | |
train_point_feature = train_features[i] | |
test_point_feature = test_feature | |
### (1) calculate distance between test feature and each of training data points | |
# get distance for data point i | |
dis = scipy.spatial.distance.euclidean(train_point_feature, test_point_feature) | |
# collect lable for datapoint i | |
y = train_labels[i] | |
label_record.append((dis, y, train_point_feature)) | |
# sort data points by distance | |
from operator import itemgetter | |
sorted_labels = sorted(label_record,key=itemgetter(0)) | |
# get major class from top K neighbors | |
major_class = [] | |
neighbor_imgs = [] | |
for k in range(K): | |
major_class.append(sorted_labels[k][1]) | |
# at most 24 neighbors for visualization | |
if k <24: | |
neighbor_feature = sorted_labels[k][2] | |
neighbor_imgs.append(neighbor_feature) | |
### get final prediction | |
final_prediction = scipy.stats.mode(major_class).mode[0] | |
### get neighbor images and save to local | |
neighbor_imgs =np.array(neighbor_imgs) | |
image_path = plot_digits(neighbor_imgs, images_per_row=6) | |
return final_prediction, image_path | |
### main function for gradio to call to classify image | |
def call_our_KNN(test_image, K=7): | |
test_image_flatten = test_image.reshape((-1, 28*28)) | |
y_pred_each, image_path = KNN_predict(train_features, train_labels, test_image_flatten, int(K)) | |
return y_pred_each, image_path | |
### generate several example cases | |
sample_images = get_sample_images(10) | |
### configure inputs/outputs | |
set_image = gr.inputs.Image(shape=(28, 28), image_mode='L') | |
set_K = gr.inputs.Slider(1, 24, step=1, default=7) | |
set_label = gr.outputs.Textbox(label="Predicted Digit") | |
set_out_images = gr.outputs.Image(label="Closest Neighbors") | |
### configure gradio, detailed can be found at https://www.gradio.app/docs/#i_slider | |
interface = gr.Interface(fn=call_our_KNN, | |
inputs=[set_image, set_K], | |
outputs=[set_label,set_out_images], | |
examples_per_page = 2, | |
examples = sample_images, | |
title="CSCI4750/5750(hw03): Digit classification using KNN algorithm", | |
description= "Click examples below for a quick demo", | |
theme = 'huggingface', | |
layout = 'vertical', | |
live=True | |
) | |
interface.launch(debug=True) |