Muedgar's picture
Create app.py
fcf60f2
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import gradio as gr
def rainPrediction(fileCSVName):
#Importing necessary libraries
#Storing the values from the dataset in a variable
if fileCSVName == "weatherAUS.csv":
dataset = pd.read_csv("/weatherAUS.csv")
#D
X = dataset.iloc[:,[1,2,3,4,7,8,9,10,11,12,13,14,15,16,18,19,20,21]].values
Y = dataset.iloc[:,-1].values
#Reshaping Y from a 1-dimensional(a[n]) array into a 2-dimensional(a[n][m]) array
Y = Y.reshape(-1,1)
#Removing NA from the dataset and replacing it with the most frequent value in that column
imputer = SimpleImputer(missing_values=np.nan,strategy='most_frequent')
X = imputer.fit_transform(X)
Y = imputer.fit_transform(Y)
#Encoding non-numerical(i.e: W,WNW) values into numerical values(i.e: 1,2,3,4)
le1 = LabelEncoder()
X[:,0] = le1.fit_transform(X[:,0])
le2 = LabelEncoder()
X[:,4] = le2.fit_transform(X[:,4])
le3 = LabelEncoder()
X[:,6] = le3.fit_transform(X[:,6])
le4 = LabelEncoder()
X[:,7] = le4.fit_transform(X[:,7])
le5 = LabelEncoder()
X[:,-1] = le5.fit_transform(X[:,-1])
le6 = LabelEncoder()
Y = le6.fit_transform(Y)
#Feature scaling to minimize data scattering
sc = StandardScaler()
X = sc.fit_transform(X)
#Dividing the dataset into 2 parts namely training data and testing data
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)
#Training our model
classifier = RandomForestClassifier(n_estimators=100,random_state=0)
classifier.fit(X_train,Y_train)
classifier.score(X_train,Y_train)
Y_test = Y_test.reshape(-1,1)
Y_pred = classifier.predict(X_test)
Y_pred = le6.inverse_transform(Y_pred)
Y_test = le6.inverse_transform(Y_test)
Y_test = Y_test.reshape(-1,1)
Y_pred = Y_pred.reshape(-1,1)
#Concatenating our test and prediction result into a dataset
df = np.concatenate((Y_test,Y_pred),axis=1)
dataframe = pd.DataFrame(df,columns=['Rain Tomorrow','Rain Prediction'])
#Checking the accuracy
print(accuracy_score(Y_test,Y_pred))
#Print .csv file
#answer = dataframe.to_csv("predictions.csv")
# return pd.read_csv("predictions.csv")
return dataframe
app = gr.Interface(rainPrediction, "text", gr.outputs.Dataframe(headers=["Rain Tomorrow", "Rain Prediction"],label="All data"))
app.launch(debug=True)