|
import pandas as pd |
|
import numpy as np |
|
from sentence_transformers import SentenceTransformer |
|
from sklearn.metrics.pairwise import cosine_similarity |
|
import gradio as gr |
|
|
|
|
|
dataset = pd.read_csv("https://huggingface.co/datasets/Pabl1nho/movies-dataset/resolve/main/9000plus.csv") |
|
dataset['full_text'] = dataset['Title'] + " : " + dataset['Overview'] |
|
|
|
|
|
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') |
|
embeddings = model.encode(dataset['full_text'], convert_to_tensor=True, show_progress_bar=True) |
|
dataset['embeddings'] = embeddings.cpu().numpy().tolist() |
|
|
|
|
|
def recommend_movies(description): |
|
input_emb = model.encode([description]) |
|
similarities = cosine_similarity(input_emb, np.array(dataset['embeddings']))[0] |
|
top_indices = similarities.argsort()[::-1][:5] |
|
results = [] |
|
for idx in top_indices: |
|
title = dataset.iloc[idx]['Title'] |
|
overview = dataset.iloc[idx]['Overview'] |
|
results.append(f"**{title}**: {overview}") |
|
return "\n\n".join(results) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=recommend_movies, |
|
inputs=gr.Textbox(lines=2, placeholder="Describe the movie you're looking for..."), |
|
outputs="markdown", |
|
title="π¬ Movie Recommendation App", |
|
description="Get 5 movies similar to your description based on semantic search!" |
|
) |
|
|
|
iface.launch() |
|
|