Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import numpy as np | |
| # Load the CSV data (make sure this file is included in your Hugging Face repository) | |
| df = pd.read_csv("Hydra-Movie-Scrape.csv") | |
| # Combine relevant columns to create an embedding text representation | |
| df["combined_text"] = (df["Title"].fillna('') + " " + | |
| df["Summary"].fillna('') + " " + | |
| df["Short Summary"].fillna('') + " " + | |
| df["Genres"].fillna('') + " " + | |
| df["Director"].fillna('') + " " + | |
| df["Writers"].fillna('') + " " + | |
| df["Cast"].fillna('')).str.strip() | |
| # Load the Sentence Transformer model | |
| model = SentenceTransformer("all-MiniLM-L6-v2") | |
| # Generate embeddings for each document in the dataset | |
| df["embedding"] = df["combined_text"].apply(lambda x: model.encode(x) if x else np.zeros(model.get_sentence_embedding_dimension())) | |
| # Function to retrieve movies based on genre or query | |
| def retrieve_movies(query, k=10): | |
| # Check if the query is a genre | |
| if query.lower() in df["Genres"].str.lower().unique(): | |
| return df[df["Genres"].str.lower().str.contains(query.lower())][["Title", "Year", "Summary", "Genres", "Director", "Cast"]].head(k) | |
| else: | |
| # Compute similarities if query is not a direct genre | |
| query_embedding = model.encode(query).reshape(1, -1) | |
| embeddings = np.vstack(df["embedding"].values) | |
| similarities = cosine_similarity(query_embedding, embeddings).flatten() | |
| top_k_indices = similarities.argsort()[-k:][::-1] | |
| return df.iloc[top_k_indices][["Title", "Year", "Summary", "Genres", "Director", "Cast"]] | |
| # Streamlit app interface | |
| st.title("π¬ Movie Recommendation and Retrieval System") | |
| st.markdown(""" | |
| Enter a description, genre, or keyword to find similar movies. | |
| Use genres like **action**, **comedy**, **drama**, etc. | |
| """) | |
| # Sidebar for user input | |
| st.sidebar.header("Search Settings") | |
| query = st.sidebar.text_input("Enter your search query (e.g., 'action', 'comedy', or a specific movie title):") | |
| num_results = st.sidebar.slider("Select number of results to display:", 1, 50, 10) | |
| if st.sidebar.button("Search"): | |
| if query: | |
| results = retrieve_movies(query, k=num_results) | |
| if results.empty: | |
| st.warning("No movies found for the given query.") | |
| else: | |
| st.subheader("Top Results:") | |
| for index, row in results.iterrows(): | |
| st.markdown(f"**Title:** {row['Title']}") | |
| st.markdown(f"**Year:** {row['Year']}") | |
| st.markdown(f"**Genres:** {row['Genres']}") | |
| st.markdown(f"**Director:** {row['Director']}") | |
| st.markdown(f"**Summary:** {row['Summary']}") | |
| st.markdown("---") | |
| else: | |
| st.warning("Please enter a query.") | |