File size: 2,719 Bytes
fcf60f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import numpy as np
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import gradio as gr
def rainPrediction(fileCSVName):
    #Importing necessary libraries
    #Storing the values from the dataset in a variable
    if fileCSVName == "weatherAUS.csv":
      dataset = pd.read_csv("/weatherAUS.csv")
    #D
    X = dataset.iloc[:,[1,2,3,4,7,8,9,10,11,12,13,14,15,16,18,19,20,21]].values
    Y = dataset.iloc[:,-1].values

    #Reshaping Y from a 1-dimensional(a[n]) array into a 2-dimensional(a[n][m]) array 
    Y = Y.reshape(-1,1)

    #Removing NA from the dataset and replacing it with the most frequent value in that column
    
    imputer = SimpleImputer(missing_values=np.nan,strategy='most_frequent')
    X = imputer.fit_transform(X)
    Y = imputer.fit_transform(Y)

    #Encoding non-numerical(i.e: W,WNW) values into numerical values(i.e: 1,2,3,4)

    le1 = LabelEncoder()
    X[:,0] = le1.fit_transform(X[:,0])
    le2 = LabelEncoder()
    X[:,4] = le2.fit_transform(X[:,4])
    le3 = LabelEncoder()
    X[:,6] = le3.fit_transform(X[:,6])
    le4 = LabelEncoder()
    X[:,7] = le4.fit_transform(X[:,7])
    le5 = LabelEncoder()
    X[:,-1] = le5.fit_transform(X[:,-1])
    le6 = LabelEncoder()
    Y = le6.fit_transform(Y)

    #Feature scaling to minimize data scattering
    
    sc = StandardScaler()
    X = sc.fit_transform(X)

    #Dividing the dataset into 2 parts namely training data and testing data

    X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.2,random_state=0)

    #Training our model

    classifier = RandomForestClassifier(n_estimators=100,random_state=0)
    classifier.fit(X_train,Y_train)
    classifier.score(X_train,Y_train)
    Y_test = Y_test.reshape(-1,1)
    Y_pred = classifier.predict(X_test)
    Y_pred = le6.inverse_transform(Y_pred)
    Y_test = le6.inverse_transform(Y_test)
    Y_test = Y_test.reshape(-1,1)
    Y_pred = Y_pred.reshape(-1,1)

    #Concatenating our test and prediction result into a dataset
    df = np.concatenate((Y_test,Y_pred),axis=1)
    dataframe = pd.DataFrame(df,columns=['Rain Tomorrow','Rain Prediction'])

    #Checking the accuracy

    print(accuracy_score(Y_test,Y_pred))

    #Print .csv file
    #answer = dataframe.to_csv("predictions.csv")

    # return pd.read_csv("predictions.csv")
    return dataframe
  
app = gr.Interface(rainPrediction, "text", gr.outputs.Dataframe(headers=["Rain Tomorrow", "Rain Prediction"],label="All data"))
app.launch(debug=True)