Spaces:

Canstralian
/

WhiteRabbitNeo-Llama-3-WhiteRabbitNeo-8B-v2.0

Running

App Files Files Community

WhiteRabbitNeo-Llama-3-WhiteRabbitNeo-8B-v2.0 / app.py

Canstralian

Update app.py

5d05d60 verified 3 months ago

raw

history blame

4.41 kB

	import streamlit as st
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import classification_report, confusion_matrix
	import matplotlib.pyplot as plt
	import seaborn as sns

	# Set the title of the app
	st.title("Cybersecurity Model Training App")

	# Sidebar for dataset upload and parameter selection
	st.sidebar.header("Upload Dataset and Parameters")

	# File uploader for dataset
	uploaded_file = st.sidebar.file_uploader("Upload your CSV dataset", type=["csv"])

	# Function to load and display dataset
	def load_data(file):
	data = pd.read_csv(file)
	st.write("Dataset Preview:")
	st.dataframe(data.head())
	return data

	# Load dataset if file is uploaded
	if uploaded_file is not None:
	data = load_data(uploaded_file)

	# Select target variable
	target = st.sidebar.selectbox("Select the target variable", data.columns)

	# Select features
	features = st.sidebar.multiselect("Select feature variables", [col for col in data.columns if col != target])

	# Split ratio
	test_size = st.sidebar.slider("Test size ratio", 0.1, 0.5, 0.3)

	# Model selection
	model_choice = st.sidebar.selectbox("Select Model", ["Random Forest", "Support Vector Machine", "Logistic Regression"])

	# Hyperparameters
	if model_choice == "Random Forest":
	n_estimators = st.sidebar.slider("Number of trees in the forest", 10, 100, 50)
	max_depth = st.sidebar.slider("Maximum depth of the tree", 1, 20, 10)
	elif model_choice == "Support Vector Machine":
	c_value = st.sidebar.slider("Regularization parameter (C)", 0.01, 10.0, 1.0)
	kernel = st.sidebar.selectbox("Kernel type", ["linear", "rbf", "poly"])
	elif model_choice == "Logistic Regression":
	c_value = st.sidebar.slider("Inverse of regularization strength (C)", 0.01, 10.0, 1.0)

	# Train model button
	if st.sidebar.button("Train Model"):
	if len(features) == 0:
	st.warning("Please select at least one feature.")
	else:
	X = data[features]
	y = data[target]

	# Split the data
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

	# Initialize and train the model
	if model_choice == "Random Forest":
	model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
	elif model_choice == "Support Vector Machine":
	from sklearn.svm import SVC
	model = SVC(C=c_value, kernel=kernel, probability=True, random_state=42)
	elif model_choice == "Logistic Regression":
	from sklearn.linear_model import LogisticRegression
	model = LogisticRegression(C=c_value, max_iter=1000, random_state=42)

	model.fit(X_train, y_train)

	# Make predictions
	y_pred = model.predict(X_test)

	# Display evaluation metrics
	st.subheader("Model Evaluation")
	st.text("Classification Report:")
	st.text(classification_report(y_test, y_pred))

	# Confusion matrix
	st.text("Confusion Matrix:")
	cm = confusion_matrix(y_test, y_pred)
	fig, ax = plt.subplots()
	sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=model.classes_, yticklabels=model.classes_)
	plt.xlabel("Predicted")
	plt.ylabel("Actual")
	st.pyplot(fig)

	# Feature importance for Random Forest
	if model_choice == "Random Forest":
	st.subheader("Feature Importance")
	feature_importance = pd.DataFrame({'Feature': features, 'Importance': model.feature_importances_})
	feature_importance = feature_importance.sort_values(by='Importance', ascending=False)
	st.bar_chart(feature_importance.set_index('Feature'))

	# Instructions when no file is uploaded
	else:
	st.write("Please upload a CSV file to get started.")

	# Additional resources
	st.sidebar.header("Additional Resources")
	st.sidebar.markdown("""
	- [Streamlit Documentation](https://docs.streamlit.io/)
	- [Scikit-learn Documentation](https://scikit-learn.org/stable/user_guide.html)
	- [Cybersecurity Datasets](https://www.kaggle.com/datasets?search=cybersecurity)
	""")