Spaces:

Canstralian
/

WhiteRabbitNeo-Llama-3-WhiteRabbitNeo-8B-v2.0

Running

App Files Files Community

Canstralian commited on Dec 31, 2024

Commit

5d05d60

verified ·

1 Parent(s): bcf99fa

Update app.py

Browse files

Files changed (1) hide show

app.py +103 -57

app.py CHANGED Viewed

@@ -1,62 +1,108 @@
 import streamlit as st
 import pandas as pd
-from sklearn.datasets import load_iris
-from sklearn.ensemble import RandomForestClassifier
 from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score
 # Set the title of the app
-st.title("Iris Flower Species Prediction App")
-# Load the Iris dataset
-iris = load_iris()
-X = pd.DataFrame(iris.data, columns=iris.feature_names)
-y = pd.Series(iris.target, name='species')
-# Split the dataset into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
-# Initialize the RandomForestClassifier
-clf = RandomForestClassifier(n_estimators=100, random_state=42)
-clf.fit(X_train, y_train)
-# Sidebar for user input features
-st.sidebar.header("Input Features")
-def user_input_features():
-    sepal_length = st.sidebar.slider('Sepal length (cm)', float(X['sepal length (cm)'].min()), float(X['sepal length (cm)'].max()), float(X['sepal length (cm)'].mean()))
-    sepal_width = st.sidebar.slider('Sepal width (cm)', float(X['sepal width (cm)'].min()), float(X['sepal width (cm)'].max()), float(X['sepal width (cm)'].mean()))
-    petal_length = st.sidebar.slider('Petal length (cm)', float(X['petal length (cm)'].min()), float(X['petal length (cm)'].max()), float(X['petal length (cm)'].mean()))
-    petal_width = st.sidebar.slider('Petal width (cm)', float(X['petal width (cm)'].min()), float(X['petal width (cm)'].max()), float(X['petal width (cm)'].mean()))
-    data = {'sepal length (cm)': sepal_length,
-            'sepal width (cm)': sepal_width,
-            'petal length (cm)': petal_length,
-            'petal width (cm)': petal_width}
-    features = pd.DataFrame(data, index=[0])
-    return features
-input_df = user_input_features()
-# Main panel
-st.subheader("User Input Features")
-# Display the user input features
-st.write(input_df)
-# Make predictions
-prediction = clf.predict(input_df)
-prediction_proba = clf.predict_proba(input_df)
-# Display the prediction
-st.subheader("Prediction")
-st.write(iris.target_names[prediction][0])
-# Display the prediction probability
-st.subheader("Prediction Probability")
-st.write(pd.DataFrame(prediction_proba, columns=iris.target_names))
-# Evaluate model accuracy
-y_pred = clf.predict(X_test)
-accuracy = accuracy_score(y_test, y_pred)
-st.subheader("Model Accuracy")
-st.write(f"{accuracy * 100:.2f}%")

 import streamlit as st
 import pandas as pd
 from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import classification_report, confusion_matrix
+import matplotlib.pyplot as plt
+import seaborn as sns
 # Set the title of the app
+st.title("Cybersecurity Model Training App")
+# Sidebar for dataset upload and parameter selection
+st.sidebar.header("Upload Dataset and Parameters")
+# File uploader for dataset
+uploaded_file = st.sidebar.file_uploader("Upload your CSV dataset", type=["csv"])
+# Function to load and display dataset
+def load_data(file):
+    data = pd.read_csv(file)
+    st.write("Dataset Preview:")
+    st.dataframe(data.head())
+    return data
+# Load dataset if file is uploaded
+if uploaded_file is not None:
+    data = load_data(uploaded_file)
+    # Select target variable
+    target = st.sidebar.selectbox("Select the target variable", data.columns)
+    # Select features
+    features = st.sidebar.multiselect("Select feature variables", [col for col in data.columns if col != target])
+    # Split ratio
+    test_size = st.sidebar.slider("Test size ratio", 0.1, 0.5, 0.3)
+    # Model selection
+    model_choice = st.sidebar.selectbox("Select Model", ["Random Forest", "Support Vector Machine", "Logistic Regression"])
+    # Hyperparameters
+    if model_choice == "Random Forest":
+        n_estimators = st.sidebar.slider("Number of trees in the forest", 10, 100, 50)
+        max_depth = st.sidebar.slider("Maximum depth of the tree", 1, 20, 10)
+    elif model_choice == "Support Vector Machine":
+        c_value = st.sidebar.slider("Regularization parameter (C)", 0.01, 10.0, 1.0)
+        kernel = st.sidebar.selectbox("Kernel type", ["linear", "rbf", "poly"])
+    elif model_choice == "Logistic Regression":
+        c_value = st.sidebar.slider("Inverse of regularization strength (C)", 0.01, 10.0, 1.0)
+    # Train model button
+    if st.sidebar.button("Train Model"):
+        if len(features) == 0:
+            st.warning("Please select at least one feature.")
+        else:
+            X = data[features]
+            y = data[target]
+            # Split the data
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
+            # Initialize and train the model
+            if model_choice == "Random Forest":
+                model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
+            elif model_choice == "Support Vector Machine":
+                from sklearn.svm import SVC
+                model = SVC(C=c_value, kernel=kernel, probability=True, random_state=42)
+            elif model_choice == "Logistic Regression":
+                from sklearn.linear_model import LogisticRegression
+                model = LogisticRegression(C=c_value, max_iter=1000, random_state=42)
+            model.fit(X_train, y_train)
+            # Make predictions
+            y_pred = model.predict(X_test)
+            # Display evaluation metrics
+            st.subheader("Model Evaluation")
+            st.text("Classification Report:")
+            st.text(classification_report(y_test, y_pred))
+            # Confusion matrix
+            st.text("Confusion Matrix:")
+            cm = confusion_matrix(y_test, y_pred)
+            fig, ax = plt.subplots()
+            sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=model.classes_, yticklabels=model.classes_)
+            plt.xlabel("Predicted")
+            plt.ylabel("Actual")
+            st.pyplot(fig)
+            # Feature importance for Random Forest
+            if model_choice == "Random Forest":
+                st.subheader("Feature Importance")
+                feature_importance = pd.DataFrame({'Feature': features, 'Importance': model.feature_importances_})
+                feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
+                st.bar_chart(feature_importance.set_index('Feature'))
+# Instructions when no file is uploaded
+else:
+    st.write("Please upload a CSV file to get started.")
+# Additional resources
+st.sidebar.header("Additional Resources")
+st.sidebar.markdown("""
+- [Streamlit Documentation](https://docs.streamlit.io/)
+- [Scikit-learn Documentation](https://scikit-learn.org/stable/user_guide.html)
+- [Cybersecurity Datasets](https://www.kaggle.com/datasets?search=cybersecurity)
+""")