Spaces:

romnatall
/

film_recomendations

Sleeping

App Files Files Community

romnatall commited on Apr 18, 2024

Commit

f82d1d4

1 Parent(s): 4368581

new model

Browse files

Files changed (4) hide show

app.py +151 -39
data.csv +2 -2
embeddings.npy +3 -0
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -3,37 +3,107 @@ import random
 import streamlit as st
 import pandas as pd
 import numpy as np
 movies = pd.read_csv('data.csv')
-# name	description	link	year	imdb	kp	country	age	actors	genres	poster
-# 0	Уэнсдэй	В американской хоррор-комедии показана детект...	https://www.lordfilm.bot/48211-ujensdjej-2022....	2022.0	8.1	8.0	США	0	Дженна Ортега, Гвендолин Кристи, Рики Линдхоум...	Сериалы, Фильмы про подростков	https://www.lordfilm.bot/uploads/posts/2022-12.
-def display_movie_card(df, index):
-    movie = df.iloc[index]
-    col1, col2 = st.columns([1, 3])
-    with col1:
-        st.image(movie['Image'], use_column_width=True)
-        button_id = f"button_{index}"
-        full=st.button('Показать полное описание', key=button_id, help='Click to expand')
     with col2:
-        st.markdown(f"<h2 style='text-align: left;'>{movie['Name']}</h2>", unsafe_allow_html=True)
-        description = ' '.join(movie['Description'][:200].split(" ")[:-1]) + '...' if len(movie['Description']) > 200 else movie['Description']
-        if len(movie['Description']) > 200:
-            if full:
-                st.write(movie['Description'])
-            else:
-                st.write(description, unsafe_allow_html=True)
-        st.write(f"[{movie['Name']}]({movie['Link']})")
-    st.write("----------------------")
 def display_rating(rating):
-    if np.isnan(rating): # Проверяем, является ли рейтинг NaN
-        return "(╥﹏╥)" # Смайлик, обозначающий отсутствие рейтинга
     stars = int(rating / 2) # Переводим рейтинг из 0-10 в 0-5 и округляем до целого
     remainder = rating % 2 # Доля рейтинга, которая не переводится в целое количество звезд
     star_str = '🌕' * stars
@@ -41,8 +111,6 @@ def display_rating(rating):
         star_str += '🌗' # Добавляем половину звезды в виде половины луны, если есть доля больше или равная 0.5
     return star_str
 def display_movie_card(df, index):
     movie = df.iloc[index]
@@ -52,39 +120,44 @@ def display_movie_card(df, index):
         st.image(movie['poster'], use_column_width=True)
         st.write(f"Жанр: {movie['genres']}")
         st.write(f"Страна: {movie['country']}")
         st.write(f"рейтинг: {movie['age']}")
     with col2:
         year = str(int(movie['year'])) if not np.isnan(movie['year']) else ""
         st.markdown(f"<h2 style='text-align: left;'>{movie['name']} ({year})</h2>", unsafe_allow_html=True)
         description = ' '.join(movie['description'][:200].split(" ")[:-1]) + '...' if len(movie['description']) > 200 else movie['description']
-        k='num'+ str(index)
-        if k not in st.session_state:
-            st.session_state[k] = False
         e = st.empty()
-        b=False
-        if movie['description'] !=description:
-            b = st.button("раскрыть описание",key=index,)
         with e:
             if b:
                 st.write(movie['description'])
             else:
                 st.write(description)
-        st.write(f"Актеры: {movie['actors']}")
         imdb,kp = st.columns([1,2])
         with imdb:
-            st.write(f"IMDB: {display_rating(movie['imdb'])}")
         with kp:
-            st.write(f"Кинопоиск: { display_rating(movie['kp'])}")
         st.write(f"[смотреть]({movie['link']})")
     st.write("----------------------")
@@ -92,7 +165,7 @@ def display_movie_card(df, index):
 reqs= st.session_state["reqs"] if "reqs" in st.session_state else {}
 @st.cache_data
-def getnums(df,size=10,text=''):
     if text in reqs:
         return reqs[text]
     else:
@@ -101,12 +174,51 @@ def getnums(df,size=10,text=''):
         return reqs[text]
-input_search = st.text_input('Search')
-for i in getnums(movies,text=input_search):
     display_movie_card(movies, i )

 import streamlit as st
 import pandas as pd
 import numpy as np
+import torch
+from transformers import AutoTokenizer, AutoModel
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
 movies = pd.read_csv('data.csv')
+toggle_state = st.sidebar.checkbox("режим разметки")
+input_search = st.text_input('Search')
+data = np.load('embeddings.npy')
+@st.cache_resource
+def get_embeddings():
+    tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
+    model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
+    # model.cuda()
+    return model, tokenizer
+@st.cache_data
+def embed_bert_cls(text, ):
+    model, tokenizer = get_embeddings()
+    t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
+    with torch.no_grad():
+        model_output = model(**{k: v.to(model.device) for k, v in t.items()})
+    embeddings = model_output.last_hidden_state[:, 0, :]
+    embeddings = torch.nn.functional.normalize(embeddings)
+    return embeddings[0].cpu().numpy()
+@st.cache_data
+def predict_rating(input_search):
+    emb = embed_bert_cls(input_search)
+    X=np.column_stack((data, np.tile(emb, (data.shape[0], 1))))
+    st.session_state["X"]=X
+    # from catboost import CatBoostRanker
+    # cb= CatBoostRanker()
+    # cb.load_model('model.cbm')
+    # y = cb.predict(X)
+    # import pickle
+    # with open('logreg.pkl', 'rb') as f:
+    #     logreg = pickle.load(f)
+    # y = logreg.predict(X)
+    y= cosine_similarity(data, emb.reshape(1, -1)).reshape(-1)
+    return top_indices(y, 10)
+def saverank(index, new_X,new_y):
+    dx=np.load('X.npy')
+    dy=np.load('y.npy')
+    dx=np.concatenate((dx, new_X.reshape(1,-1)))
+    dy=np.concatenate((dy,np.array([new_y])))
+    np.save('X.npy',dx)
+    np.save('y.npy',dy)
+def ask_rating(movie,index):
+    # Создаем переменную для хранения оценки
+    rating = 0
+    # Создаем горизонтальный столбец
+    col1, col2, col3, col4, col5 = st.columns(5)
+    # В каждом столбце выводим кнопку оценки
+    with col1:
+        b1 = st.button("1",key="1"+str(index))
     with col2:
+        b2 = st.button("2" ,key="2"+str(index))
+    with col3:
+        b3 = st.button("3",key="3"+str(index))
+    with col4:
+        b4 = st.button("4",key="4"+str(index))
+    with col5:
+        b5 = st.button("5",key="5"+str(index))
+    if b1:
+        rating = 1
+    if b2:
+        rating = 2
+    if b3:
+        rating = 3
+    if b4:
+        rating = 4
+    if b5:
+        rating = 5
+    if rating>0:
+        saverank(index,st.session_state["X"][index],rating)
 def display_rating(rating):
     stars = int(rating / 2) # Переводим рейтинг из 0-10 в 0-5 и округляем до целого
     remainder = rating % 2 # Доля рейтинга, которая не переводится в целое количество звезд
     star_str = '🌕' * stars
         star_str += '🌗' # Добавляем половину звезды в виде половины луны, если есть доля больше или равная 0.5
     return star_str
 def display_movie_card(df, index):
     movie = df.iloc[index]
         st.image(movie['poster'], use_column_width=True)
         st.write(f"Жанр: {movie['genres']}")
         st.write(f"Страна: {movie['country']}")
         st.write(f"рейтинг: {movie['age']}")
+        st.write(st.session_state["pred"][index])
     with col2:
         year = str(int(movie['year'])) if not np.isnan(movie['year']) else ""
         st.markdown(f"<h2 style='text-align: left;'>{movie['name']} ({year})</h2>", unsafe_allow_html=True)
         description = ' '.join(movie['description'][:200].split(" ")[:-1]) + '...' if len(movie['description']) > 200 else movie['description']
         e = st.empty()
+        b=toggle_state
+        if movie['description'] !=description and not toggle_state:
+            b = st.button("раскрыть описание",key=index)
         with e:
             if b:
                 st.write(movie['description'])
             else:
                 st.write(description)
+        if toggle_state:
+            ask_rating(movie,index)
+            input = st.text_input(' ',key = "search"+str(index))
+            if input:
+                emb = embed_bert_cls(input)
+                fullemb = np.concatenate(( st.session_state["X"][index,:312], emb))
+                saverank(index,fullemb,5)
+        st.write(f"Актеры: {movie['actors']}")
         imdb,kp = st.columns([1,2])
         with imdb:
+            st.write(f"IMDB: {display_rating(movie['imdb'])}" if not np.isnan(movie['imdb']) else "")
         with kp:
+            st.write(f"Кинопоиск: { display_rating(movie['kp'])}" if not np.isnan(movie['kp']) else "")
         st.write(f"[смотреть]({movie['link']})")
     st.write("----------------------")
 reqs= st.session_state["reqs"] if "reqs" in st.session_state else {}
 @st.cache_data
+def getnums(df,size=0,text=''):
     if text in reqs:
         return reqs[text]
     else:
         return reqs[text]
+def top_indices(array, n):
+    # Получаем индексы элементов, отсортированных по убыванию
+    st.session_state["pred"] = array
+    sorted_indices = np.argsort(array)[::-1]
+    # Выбираем первые n индексов
+    top_n_indices = sorted_indices[:n]
+    return top_n_indices
+for i in predict_rating(input_search):
     display_movie_card(movies, i )
+def ask_rating(movie):
+    # Создаем переменную для хранения оценки
+    rating = 0
+    # Создаем горизонтальный столбец
+    col1, col2, col3, col4, col5 = st.columns(5)
+    # В каждом столбце выводим кнопку оценки
+    with col1:
+        b1 = st.button("1")
+    with col2:
+        b2 = st.button("2")
+    with col3:
+        b3 = st.button("3")
+    with col4:
+        b4 = st.button("4")
+    with col5:
+        b5 = st.button("5")
+    if b1:
+        rating = 1
+    if b2:
+        rating = 2
+    if b3:
+        rating = 3
+    if b4:
+        rating = 4
+    if b5:
+        rating = 5
+    return rating

data.csv CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c9f8fc3da12290a08172ceddd079ad89a06d7e8041f7e800a61661a4fedcefa5
-size 34501073

 version https://git-lfs.github.com/spec/v1
+oid sha256:b3880998a33fa7f246482272f6c0e8270c6d759ee594a94030cf9d722373f604
+size 34515511

embeddings.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f6ebe9af14012e5d2572f995ef84a2f43f07f0235a09e79312ade95b02179d0
+size 36520352

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 numpy
 pandas
 streamlit

 numpy
 pandas
+scikit_learn
 streamlit
+torch
+transformers