Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import plotly.express as px | |
from transformers import ( | |
AutoTokenizer, | |
AutoModel, | |
AutoModelForSequenceClassification | |
) | |
import torch | |
# Initialize pipelines and tokenizers | |
def load_components(): | |
# Pipeline 1: Director analysis | |
director_tokenizer = AutoTokenizer.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb") | |
director_model = AutoModelForSequenceClassification.from_pretrained("huggingface-course/distilbert-base-uncased-finetuned-imdb") | |
# Pipeline 2: Semantic similarity for movie recommendation | |
sim_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2") | |
sim_model = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2") | |
return { | |
"director": (director_tokenizer, director_model), | |
"similarity": (sim_tokenizer, sim_model) | |
} | |
components = load_components() | |
# Unpack components | |
director_tokenizer, director_model = components["director"] | |
sim_tokenizer, sim_model = components["similarity"] | |
# Genre mapping (translated) | |
genre_mapping = {"Action": 0, "Comedy": 1, "Sci-Fi": 2, "Adventure": 3} | |
# Sample database | |
movie_db = pd.DataFrame({ | |
'Title': ['Avatar', 'Interstellar', 'Jurassic Park', 'Fast & Furious 7', 'Hi, Mom'], | |
'Genre': ['Sci-Fi', 'Sci-Fi', 'Adventure', 'Action', 'Comedy'], | |
'Budget (Billion USD)': [2.37, 1.65, 0.63, 1.9, 0.15], | |
'Box Office (Billion USD)': [2.92, 0.71, 1.10, 1.51, 0.83] | |
}) | |
# Pipeline: Director quality analysis | |
def analyze_director(director): | |
inputs = director_tokenizer(director, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = director_model(**inputs) | |
scores = torch.sigmoid(outputs.logits) | |
return { | |
"Commercial Value": scores[0][0].item() * 10, | |
"Artistic Quality": scores[0][1].item() * 10 | |
} | |
# Pipeline: Movie recommendation | |
def find_similar_movies(title, genre): | |
inputs = sim_tokenizer(title, padding=True, truncation=True, return_tensors="pt") | |
with torch.no_grad(): | |
title_embed = sim_model(**inputs).last_hidden_state.mean(dim=1) | |
similarities = [] | |
for _, row in movie_db.iterrows(): | |
movie_inputs = sim_tokenizer(row['Title'], padding=True, truncation=True, return_tensors="pt") | |
with torch.no_grad(): | |
movie_embed = sim_model(**movie_inputs).last_hidden_state.mean(dim=1) | |
sim = torch.cosine_similarity(title_embed, movie_embed) | |
similarities.append(sim.item()) | |
movie_db['Similarity'] = similarities | |
return movie_db[movie_db['Genre'] == genre].sort_values('Similarity', ascending=False) | |
# Streamlit Interface | |
st.title("π¬ Movie Intelligence Dashboard") | |
with st.sidebar: | |
director = st.text_input("Director Name", "Christopher Nolan") | |
title = st.text_input("Movie Title", "Inception 2") | |
genre = st.selectbox("Genre", list(genre_mapping.keys())) | |
if st.button("Analyze"): | |
# Director analysis | |
st.header("π§βπΌ Director Profile") | |
director_scores = analyze_director(director) | |
fig = px.bar( | |
x=list(director_scores.keys()), | |
y=list(director_scores.values()), | |
range_y=[0, 10] | |
) | |
st.plotly_chart(fig) | |
# Movie recommendation | |
st.header("π Recommended Movies") | |
similar_movies = find_similar_movies(title, genre) | |
st.dataframe( | |
similar_movies[['Title', 'Genre', 'Budget (Billion USD)', 'Box Office (Billion USD)', 'Similarity']], | |
column_config={ | |
"Similarity": st.column_config.ProgressColumn( | |
format="%.2f", | |
min_value=0, | |
max_value=1 | |
) | |
} | |
) | |