import numpy as np import pandas as pd from sklearn.impute import SimpleImputer from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score import gradio as gr def rainPrediction(fileCSVName): #Importing necessary libraries #Storing the values from the dataset in a variable if fileCSVName == "weatherAUS.csv": dataset = pd.read_csv("/weatherAUS.csv") #D X = dataset.iloc[:,[1,2,3,4,7,8,9,10,11,12,13,14,15,16,18,19,20,21]].values Y = dataset.iloc[:,-1].values #Reshaping Y from a 1-dimensional(a[n]) array into a 2-dimensional(a[n][m]) array Y = Y.reshape(-1,1) #Removing NA from the dataset and replacing it with the most frequent value in that column imputer = SimpleImputer(missing_values=np.nan,strategy='most_frequent') X = imputer.fit_transform(X) Y = imputer.fit_transform(Y) #Encoding non-numerical(i.e: W,WNW) values into numerical values(i.e: 1,2,3,4) le1 = LabelEncoder() X[:,0] = le1.fit_transform(X[:,0]) le2 = LabelEncoder() X[:,4] = le2.fit_transform(X[:,4]) le3 = LabelEncoder() X[:,6] = le3.fit_transform(X[:,6]) le4 = LabelEncoder() X[:,7] = le4.fit_transform(X[:,7]) le5 = LabelEncoder() X[:,-1] = le5.fit_transform(X[:,-1]) le6 = LabelEncoder() Y = le6.fit_transform(Y) #Feature scaling to minimize data scattering sc = StandardScaler() X = sc.fit_transform(X) #Dividing the dataset into 2 parts namely training data and testing data X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=0) #Training our model classifier = RandomForestClassifier(n_estimators=100,random_state=0) classifier.fit(X_train,Y_train) classifier.score(X_train,Y_train) Y_test = Y_test.reshape(-1,1) Y_pred = classifier.predict(X_test) Y_pred = le6.inverse_transform(Y_pred) Y_test = le6.inverse_transform(Y_test) Y_test = Y_test.reshape(-1,1) Y_pred = Y_pred.reshape(-1,1) #Concatenating our test and prediction result into a dataset df = np.concatenate((Y_test,Y_pred),axis=1) dataframe = pd.DataFrame(df,columns=['Rain Tomorrow','Rain Prediction']) #Checking the accuracy print(accuracy_score(Y_test,Y_pred)) #Print .csv file #answer = dataframe.to_csv("predictions.csv") # return pd.read_csv("predictions.csv") return dataframe app = gr.Interface(rainPrediction, "text", gr.outputs.Dataframe(headers=["Rain Tomorrow", "Rain Prediction"],label="All data")) app.launch(debug=True)