movies-dataset / app.py
Ahmadkhan12's picture
Create app.py
2082d86 verified
import streamlit as st
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
# Load the CSV data (make sure this file is included in your Hugging Face repository)
df = pd.read_csv("Hydra-Movie-Scrape.csv")
# Combine relevant columns to create an embedding text representation
df["combined_text"] = (df["Title"].fillna('') + " " +
df["Summary"].fillna('') + " " +
df["Short Summary"].fillna('') + " " +
df["Genres"].fillna('') + " " +
df["Director"].fillna('') + " " +
df["Writers"].fillna('') + " " +
df["Cast"].fillna('')).str.strip()
# Load the Sentence Transformer model
model = SentenceTransformer("all-MiniLM-L6-v2")
# Generate embeddings for each document in the dataset
df["embedding"] = df["combined_text"].apply(lambda x: model.encode(x) if x else np.zeros(model.get_sentence_embedding_dimension()))
# Function to retrieve movies based on genre or query
def retrieve_movies(query, k=10):
# Check if the query is a genre
if query.lower() in df["Genres"].str.lower().unique():
return df[df["Genres"].str.lower().str.contains(query.lower())][["Title", "Year", "Summary", "Genres", "Director", "Cast"]].head(k)
else:
# Compute similarities if query is not a direct genre
query_embedding = model.encode(query).reshape(1, -1)
embeddings = np.vstack(df["embedding"].values)
similarities = cosine_similarity(query_embedding, embeddings).flatten()
top_k_indices = similarities.argsort()[-k:][::-1]
return df.iloc[top_k_indices][["Title", "Year", "Summary", "Genres", "Director", "Cast"]]
# Streamlit app interface
st.title("🎬 Movie Recommendation and Retrieval System")
st.markdown("""
Enter a description, genre, or keyword to find similar movies.
Use genres like **action**, **comedy**, **drama**, etc.
""")
# Sidebar for user input
st.sidebar.header("Search Settings")
query = st.sidebar.text_input("Enter your search query (e.g., 'action', 'comedy', or a specific movie title):")
num_results = st.sidebar.slider("Select number of results to display:", 1, 50, 10)
if st.sidebar.button("Search"):
if query:
results = retrieve_movies(query, k=num_results)
if results.empty:
st.warning("No movies found for the given query.")
else:
st.subheader("Top Results:")
for index, row in results.iterrows():
st.markdown(f"**Title:** {row['Title']}")
st.markdown(f"**Year:** {row['Year']}")
st.markdown(f"**Genres:** {row['Genres']}")
st.markdown(f"**Director:** {row['Director']}")
st.markdown(f"**Summary:** {row['Summary']}")
st.markdown("---")
else:
st.warning("Please enter a query.")