Canstralian commited on
Commit
5d05d60
·
verified ·
1 Parent(s): bcf99fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -57
app.py CHANGED
@@ -1,62 +1,108 @@
1
  import streamlit as st
2
  import pandas as pd
3
- from sklearn.datasets import load_iris
4
- from sklearn.ensemble import RandomForestClassifier
5
  from sklearn.model_selection import train_test_split
6
- from sklearn.metrics import accuracy_score
 
 
 
7
 
8
  # Set the title of the app
9
- st.title("Iris Flower Species Prediction App")
10
-
11
- # Load the Iris dataset
12
- iris = load_iris()
13
- X = pd.DataFrame(iris.data, columns=iris.feature_names)
14
- y = pd.Series(iris.target, name='species')
15
-
16
- # Split the dataset into training and testing sets
17
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
18
-
19
- # Initialize the RandomForestClassifier
20
- clf = RandomForestClassifier(n_estimators=100, random_state=42)
21
- clf.fit(X_train, y_train)
22
-
23
- # Sidebar for user input features
24
- st.sidebar.header("Input Features")
25
-
26
- def user_input_features():
27
- sepal_length = st.sidebar.slider('Sepal length (cm)', float(X['sepal length (cm)'].min()), float(X['sepal length (cm)'].max()), float(X['sepal length (cm)'].mean()))
28
- sepal_width = st.sidebar.slider('Sepal width (cm)', float(X['sepal width (cm)'].min()), float(X['sepal width (cm)'].max()), float(X['sepal width (cm)'].mean()))
29
- petal_length = st.sidebar.slider('Petal length (cm)', float(X['petal length (cm)'].min()), float(X['petal length (cm)'].max()), float(X['petal length (cm)'].mean()))
30
- petal_width = st.sidebar.slider('Petal width (cm)', float(X['petal width (cm)'].min()), float(X['petal width (cm)'].max()), float(X['petal width (cm)'].mean()))
31
- data = {'sepal length (cm)': sepal_length,
32
- 'sepal width (cm)': sepal_width,
33
- 'petal length (cm)': petal_length,
34
- 'petal width (cm)': petal_width}
35
- features = pd.DataFrame(data, index=[0])
36
- return features
37
-
38
- input_df = user_input_features()
39
-
40
- # Main panel
41
- st.subheader("User Input Features")
42
-
43
- # Display the user input features
44
- st.write(input_df)
45
-
46
- # Make predictions
47
- prediction = clf.predict(input_df)
48
- prediction_proba = clf.predict_proba(input_df)
49
-
50
- # Display the prediction
51
- st.subheader("Prediction")
52
- st.write(iris.target_names[prediction][0])
53
-
54
- # Display the prediction probability
55
- st.subheader("Prediction Probability")
56
- st.write(pd.DataFrame(prediction_proba, columns=iris.target_names))
57
-
58
- # Evaluate model accuracy
59
- y_pred = clf.predict(X_test)
60
- accuracy = accuracy_score(y_test, y_pred)
61
- st.subheader("Model Accuracy")
62
- st.write(f"{accuracy * 100:.2f}%")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
 
 
3
  from sklearn.model_selection import train_test_split
4
+ from sklearn.ensemble import RandomForestClassifier
5
+ from sklearn.metrics import classification_report, confusion_matrix
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
 
9
  # Set the title of the app
10
+ st.title("Cybersecurity Model Training App")
11
+
12
+ # Sidebar for dataset upload and parameter selection
13
+ st.sidebar.header("Upload Dataset and Parameters")
14
+
15
+ # File uploader for dataset
16
+ uploaded_file = st.sidebar.file_uploader("Upload your CSV dataset", type=["csv"])
17
+
18
+ # Function to load and display dataset
19
+ def load_data(file):
20
+ data = pd.read_csv(file)
21
+ st.write("Dataset Preview:")
22
+ st.dataframe(data.head())
23
+ return data
24
+
25
+ # Load dataset if file is uploaded
26
+ if uploaded_file is not None:
27
+ data = load_data(uploaded_file)
28
+
29
+ # Select target variable
30
+ target = st.sidebar.selectbox("Select the target variable", data.columns)
31
+
32
+ # Select features
33
+ features = st.sidebar.multiselect("Select feature variables", [col for col in data.columns if col != target])
34
+
35
+ # Split ratio
36
+ test_size = st.sidebar.slider("Test size ratio", 0.1, 0.5, 0.3)
37
+
38
+ # Model selection
39
+ model_choice = st.sidebar.selectbox("Select Model", ["Random Forest", "Support Vector Machine", "Logistic Regression"])
40
+
41
+ # Hyperparameters
42
+ if model_choice == "Random Forest":
43
+ n_estimators = st.sidebar.slider("Number of trees in the forest", 10, 100, 50)
44
+ max_depth = st.sidebar.slider("Maximum depth of the tree", 1, 20, 10)
45
+ elif model_choice == "Support Vector Machine":
46
+ c_value = st.sidebar.slider("Regularization parameter (C)", 0.01, 10.0, 1.0)
47
+ kernel = st.sidebar.selectbox("Kernel type", ["linear", "rbf", "poly"])
48
+ elif model_choice == "Logistic Regression":
49
+ c_value = st.sidebar.slider("Inverse of regularization strength (C)", 0.01, 10.0, 1.0)
50
+
51
+ # Train model button
52
+ if st.sidebar.button("Train Model"):
53
+ if len(features) == 0:
54
+ st.warning("Please select at least one feature.")
55
+ else:
56
+ X = data[features]
57
+ y = data[target]
58
+
59
+ # Split the data
60
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
61
+
62
+ # Initialize and train the model
63
+ if model_choice == "Random Forest":
64
+ model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
65
+ elif model_choice == "Support Vector Machine":
66
+ from sklearn.svm import SVC
67
+ model = SVC(C=c_value, kernel=kernel, probability=True, random_state=42)
68
+ elif model_choice == "Logistic Regression":
69
+ from sklearn.linear_model import LogisticRegression
70
+ model = LogisticRegression(C=c_value, max_iter=1000, random_state=42)
71
+
72
+ model.fit(X_train, y_train)
73
+
74
+ # Make predictions
75
+ y_pred = model.predict(X_test)
76
+
77
+ # Display evaluation metrics
78
+ st.subheader("Model Evaluation")
79
+ st.text("Classification Report:")
80
+ st.text(classification_report(y_test, y_pred))
81
+
82
+ # Confusion matrix
83
+ st.text("Confusion Matrix:")
84
+ cm = confusion_matrix(y_test, y_pred)
85
+ fig, ax = plt.subplots()
86
+ sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=model.classes_, yticklabels=model.classes_)
87
+ plt.xlabel("Predicted")
88
+ plt.ylabel("Actual")
89
+ st.pyplot(fig)
90
+
91
+ # Feature importance for Random Forest
92
+ if model_choice == "Random Forest":
93
+ st.subheader("Feature Importance")
94
+ feature_importance = pd.DataFrame({'Feature': features, 'Importance': model.feature_importances_})
95
+ feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
96
+ st.bar_chart(feature_importance.set_index('Feature'))
97
+
98
+ # Instructions when no file is uploaded
99
+ else:
100
+ st.write("Please upload a CSV file to get started.")
101
+
102
+ # Additional resources
103
+ st.sidebar.header("Additional Resources")
104
+ st.sidebar.markdown("""
105
+ - [Streamlit Documentation](https://docs.streamlit.io/)
106
+ - [Scikit-learn Documentation](https://scikit-learn.org/stable/user_guide.html)
107
+ - [Cybersecurity Datasets](https://www.kaggle.com/datasets?search=cybersecurity)
108
+ """)