Spaces:
Sleeping
Sleeping
romnatall
commited on
Commit
·
f82d1d4
1
Parent(s):
4368581
new model
Browse files- app.py +151 -39
- data.csv +2 -2
- embeddings.npy +3 -0
- requirements.txt +3 -0
app.py
CHANGED
@@ -3,37 +3,107 @@ import random
|
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
5 |
import numpy as np
|
|
|
|
|
|
|
|
|
6 |
|
7 |
movies = pd.read_csv('data.csv')
|
8 |
|
|
|
|
|
9 |
|
10 |
-
# name description link year imdb kp country age actors genres poster
|
11 |
-
# 0 Уэнсдэй В американской хоррор-комедии показана детект... https://www.lordfilm.bot/48211-ujensdjej-2022.... 2022.0 8.1 8.0 США 0 Дженна Ортега, Гвендолин Кристи, Рики Линдхоум... Сериалы, Фильмы про подростков https://www.lordfilm.bot/uploads/posts/2022-12.
|
12 |
|
13 |
-
def display_movie_card(df, index):
|
14 |
-
movie = df.iloc[index]
|
15 |
-
col1, col2 = st.columns([1, 3])
|
16 |
|
17 |
-
|
18 |
-
|
19 |
-
button_id = f"button_{index}"
|
20 |
-
full=st.button('Показать полное описание', key=button_id, help='Click to expand')
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
with col2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
st.markdown(f"<h2 style='text-align: left;'>{movie['Name']}</h2>", unsafe_allow_html=True)
|
25 |
-
description = ' '.join(movie['Description'][:200].split(" ")[:-1]) + '...' if len(movie['Description']) > 200 else movie['Description']
|
26 |
-
if len(movie['Description']) > 200:
|
27 |
-
if full:
|
28 |
-
st.write(movie['Description'])
|
29 |
-
else:
|
30 |
-
st.write(description, unsafe_allow_html=True)
|
31 |
-
st.write(f"[{movie['Name']}]({movie['Link']})")
|
32 |
-
st.write("----------------------")
|
33 |
|
34 |
def display_rating(rating):
|
35 |
-
|
36 |
-
return "(╥﹏╥)" # Смайлик, обозначающий отсутствие рейтинга
|
37 |
stars = int(rating / 2) # Переводим рейтинг из 0-10 в 0-5 и округляем до целого
|
38 |
remainder = rating % 2 # Доля рейтинга, которая не переводится в целое количество звезд
|
39 |
star_str = '🌕' * stars
|
@@ -41,8 +111,6 @@ def display_rating(rating):
|
|
41 |
star_str += '🌗' # Добавляем половину звезды в виде половины луны, если есть доля больше или равная 0.5
|
42 |
return star_str
|
43 |
|
44 |
-
|
45 |
-
|
46 |
def display_movie_card(df, index):
|
47 |
|
48 |
movie = df.iloc[index]
|
@@ -52,39 +120,44 @@ def display_movie_card(df, index):
|
|
52 |
st.image(movie['poster'], use_column_width=True)
|
53 |
|
54 |
st.write(f"Жанр: {movie['genres']}")
|
55 |
-
|
56 |
st.write(f"Страна: {movie['country']}")
|
57 |
st.write(f"рейтинг: {movie['age']}")
|
58 |
-
|
59 |
|
60 |
with col2:
|
61 |
year = str(int(movie['year'])) if not np.isnan(movie['year']) else ""
|
62 |
st.markdown(f"<h2 style='text-align: left;'>{movie['name']} ({year})</h2>", unsafe_allow_html=True)
|
63 |
-
|
64 |
-
|
65 |
description = ' '.join(movie['description'][:200].split(" ")[:-1]) + '...' if len(movie['description']) > 200 else movie['description']
|
66 |
|
67 |
-
|
68 |
-
if k not in st.session_state:
|
69 |
-
st.session_state[k] = False
|
70 |
|
71 |
e = st.empty()
|
72 |
-
b=
|
73 |
-
if movie['description'] !=description:
|
74 |
-
b = st.button("раскрыть описание",key=index
|
|
|
75 |
with e:
|
76 |
if b:
|
77 |
st.write(movie['description'])
|
78 |
else:
|
79 |
st.write(description)
|
80 |
-
st.write(f"Актеры: {movie['actors']}")
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
imdb,kp = st.columns([1,2])
|
83 |
with imdb:
|
84 |
-
st.write(f"IMDB: {display_rating(movie['imdb'])}")
|
85 |
with kp:
|
86 |
-
st.write(f"Кинопоиск: { display_rating(movie['kp'])}")
|
87 |
-
|
88 |
st.write(f"[смотреть]({movie['link']})")
|
89 |
st.write("----------------------")
|
90 |
|
@@ -92,7 +165,7 @@ def display_movie_card(df, index):
|
|
92 |
reqs= st.session_state["reqs"] if "reqs" in st.session_state else {}
|
93 |
|
94 |
@st.cache_data
|
95 |
-
def getnums(df,size=
|
96 |
if text in reqs:
|
97 |
return reqs[text]
|
98 |
else:
|
@@ -101,12 +174,51 @@ def getnums(df,size=10,text=''):
|
|
101 |
return reqs[text]
|
102 |
|
103 |
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
-
|
|
|
|
|
107 |
display_movie_card(movies, i )
|
108 |
|
109 |
|
110 |
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
|
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
5 |
import numpy as np
|
6 |
+
import torch
|
7 |
+
from transformers import AutoTokenizer, AutoModel
|
8 |
+
import numpy as np
|
9 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
|
11 |
movies = pd.read_csv('data.csv')
|
12 |
|
13 |
+
toggle_state = st.sidebar.checkbox("режим разметки")
|
14 |
+
input_search = st.text_input('Search')
|
15 |
|
|
|
|
|
16 |
|
|
|
|
|
|
|
17 |
|
18 |
+
data = np.load('embeddings.npy')
|
19 |
+
|
|
|
|
|
20 |
|
21 |
+
|
22 |
+
@st.cache_resource
|
23 |
+
def get_embeddings():
|
24 |
+
tokenizer = AutoTokenizer.from_pretrained("cointegrated/rubert-tiny2")
|
25 |
+
model = AutoModel.from_pretrained("cointegrated/rubert-tiny2")
|
26 |
+
# model.cuda()
|
27 |
+
return model, tokenizer
|
28 |
+
|
29 |
+
@st.cache_data
|
30 |
+
def embed_bert_cls(text, ):
|
31 |
+
model, tokenizer = get_embeddings()
|
32 |
+
t = tokenizer(text, padding=True, truncation=True, return_tensors='pt')
|
33 |
+
with torch.no_grad():
|
34 |
+
model_output = model(**{k: v.to(model.device) for k, v in t.items()})
|
35 |
+
embeddings = model_output.last_hidden_state[:, 0, :]
|
36 |
+
embeddings = torch.nn.functional.normalize(embeddings)
|
37 |
+
return embeddings[0].cpu().numpy()
|
38 |
+
|
39 |
+
|
40 |
+
@st.cache_data
|
41 |
+
def predict_rating(input_search):
|
42 |
+
|
43 |
+
emb = embed_bert_cls(input_search)
|
44 |
+
X=np.column_stack((data, np.tile(emb, (data.shape[0], 1))))
|
45 |
+
st.session_state["X"]=X
|
46 |
+
|
47 |
+
# from catboost import CatBoostRanker
|
48 |
+
# cb= CatBoostRanker()
|
49 |
+
# cb.load_model('model.cbm')
|
50 |
+
# y = cb.predict(X)
|
51 |
+
|
52 |
+
# import pickle
|
53 |
+
# with open('logreg.pkl', 'rb') as f:
|
54 |
+
# logreg = pickle.load(f)
|
55 |
+
# y = logreg.predict(X)
|
56 |
+
|
57 |
+
y= cosine_similarity(data, emb.reshape(1, -1)).reshape(-1)
|
58 |
+
|
59 |
+
return top_indices(y, 10)
|
60 |
+
|
61 |
+
|
62 |
+
def saverank(index, new_X,new_y):
|
63 |
+
dx=np.load('X.npy')
|
64 |
+
dy=np.load('y.npy')
|
65 |
+
dx=np.concatenate((dx, new_X.reshape(1,-1)))
|
66 |
+
dy=np.concatenate((dy,np.array([new_y])))
|
67 |
+
np.save('X.npy',dx)
|
68 |
+
np.save('y.npy',dy)
|
69 |
+
|
70 |
+
def ask_rating(movie,index):
|
71 |
+
# Создаем переменную для хранения оценки
|
72 |
+
rating = 0
|
73 |
+
|
74 |
+
# Создаем горизонтальный столбец
|
75 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
76 |
+
|
77 |
+
# В каждом столбце выводим кнопку оценки
|
78 |
+
with col1:
|
79 |
+
b1 = st.button("1",key="1"+str(index))
|
80 |
with col2:
|
81 |
+
b2 = st.button("2" ,key="2"+str(index))
|
82 |
+
with col3:
|
83 |
+
b3 = st.button("3",key="3"+str(index))
|
84 |
+
with col4:
|
85 |
+
b4 = st.button("4",key="4"+str(index))
|
86 |
+
with col5:
|
87 |
+
b5 = st.button("5",key="5"+str(index))
|
88 |
+
|
89 |
+
if b1:
|
90 |
+
rating = 1
|
91 |
+
if b2:
|
92 |
+
rating = 2
|
93 |
+
if b3:
|
94 |
+
rating = 3
|
95 |
+
if b4:
|
96 |
+
rating = 4
|
97 |
+
if b5:
|
98 |
+
rating = 5
|
99 |
+
|
100 |
+
if rating>0:
|
101 |
+
saverank(index,st.session_state["X"][index],rating)
|
102 |
+
|
103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
def display_rating(rating):
|
106 |
+
|
|
|
107 |
stars = int(rating / 2) # Переводим рейтинг из 0-10 в 0-5 и округляем до целого
|
108 |
remainder = rating % 2 # Доля рейтинга, которая не переводится в целое количество звезд
|
109 |
star_str = '🌕' * stars
|
|
|
111 |
star_str += '🌗' # Добавляем половину звезды в виде половины луны, если есть доля больше или равная 0.5
|
112 |
return star_str
|
113 |
|
|
|
|
|
114 |
def display_movie_card(df, index):
|
115 |
|
116 |
movie = df.iloc[index]
|
|
|
120 |
st.image(movie['poster'], use_column_width=True)
|
121 |
|
122 |
st.write(f"Жанр: {movie['genres']}")
|
|
|
123 |
st.write(f"Страна: {movie['country']}")
|
124 |
st.write(f"рейтинг: {movie['age']}")
|
125 |
+
st.write(st.session_state["pred"][index])
|
126 |
|
127 |
with col2:
|
128 |
year = str(int(movie['year'])) if not np.isnan(movie['year']) else ""
|
129 |
st.markdown(f"<h2 style='text-align: left;'>{movie['name']} ({year})</h2>", unsafe_allow_html=True)
|
|
|
|
|
130 |
description = ' '.join(movie['description'][:200].split(" ")[:-1]) + '...' if len(movie['description']) > 200 else movie['description']
|
131 |
|
132 |
+
|
|
|
|
|
133 |
|
134 |
e = st.empty()
|
135 |
+
b=toggle_state
|
136 |
+
if movie['description'] !=description and not toggle_state:
|
137 |
+
b = st.button("раскрыть описание",key=index)
|
138 |
+
|
139 |
with e:
|
140 |
if b:
|
141 |
st.write(movie['description'])
|
142 |
else:
|
143 |
st.write(description)
|
|
|
144 |
|
145 |
+
|
146 |
+
|
147 |
+
if toggle_state:
|
148 |
+
ask_rating(movie,index)
|
149 |
+
input = st.text_input(' ',key = "search"+str(index))
|
150 |
+
if input:
|
151 |
+
emb = embed_bert_cls(input)
|
152 |
+
fullemb = np.concatenate(( st.session_state["X"][index,:312], emb))
|
153 |
+
saverank(index,fullemb,5)
|
154 |
+
|
155 |
+
st.write(f"Актеры: {movie['actors']}")
|
156 |
imdb,kp = st.columns([1,2])
|
157 |
with imdb:
|
158 |
+
st.write(f"IMDB: {display_rating(movie['imdb'])}" if not np.isnan(movie['imdb']) else "")
|
159 |
with kp:
|
160 |
+
st.write(f"Кинопоиск: { display_rating(movie['kp'])}" if not np.isnan(movie['kp']) else "")
|
|
|
161 |
st.write(f"[смотреть]({movie['link']})")
|
162 |
st.write("----------------------")
|
163 |
|
|
|
165 |
reqs= st.session_state["reqs"] if "reqs" in st.session_state else {}
|
166 |
|
167 |
@st.cache_data
|
168 |
+
def getnums(df,size=0,text=''):
|
169 |
if text in reqs:
|
170 |
return reqs[text]
|
171 |
else:
|
|
|
174 |
return reqs[text]
|
175 |
|
176 |
|
177 |
+
def top_indices(array, n):
|
178 |
+
# Получаем индексы элементов, отсортированных по убыванию
|
179 |
+
st.session_state["pred"] = array
|
180 |
+
sorted_indices = np.argsort(array)[::-1]
|
181 |
+
# Выбираем первые n индексов
|
182 |
+
top_n_indices = sorted_indices[:n]
|
183 |
+
return top_n_indices
|
184 |
+
|
185 |
|
186 |
+
|
187 |
+
|
188 |
+
for i in predict_rating(input_search):
|
189 |
display_movie_card(movies, i )
|
190 |
|
191 |
|
192 |
|
193 |
|
194 |
+
def ask_rating(movie):
|
195 |
+
# Создаем переменную для хранения оценки
|
196 |
+
rating = 0
|
197 |
+
|
198 |
+
# Создаем горизонтальный столбец
|
199 |
+
col1, col2, col3, col4, col5 = st.columns(5)
|
200 |
+
|
201 |
+
# В каждом столбце выводим кнопку оценки
|
202 |
+
with col1:
|
203 |
+
b1 = st.button("1")
|
204 |
+
with col2:
|
205 |
+
b2 = st.button("2")
|
206 |
+
with col3:
|
207 |
+
b3 = st.button("3")
|
208 |
+
with col4:
|
209 |
+
b4 = st.button("4")
|
210 |
+
with col5:
|
211 |
+
b5 = st.button("5")
|
212 |
+
|
213 |
+
if b1:
|
214 |
+
rating = 1
|
215 |
+
if b2:
|
216 |
+
rating = 2
|
217 |
+
if b3:
|
218 |
+
rating = 3
|
219 |
+
if b4:
|
220 |
+
rating = 4
|
221 |
+
if b5:
|
222 |
+
rating = 5
|
223 |
+
return rating
|
224 |
|
data.csv
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3880998a33fa7f246482272f6c0e8270c6d759ee594a94030cf9d722373f604
|
3 |
+
size 34515511
|
embeddings.npy
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f6ebe9af14012e5d2572f995ef84a2f43f07f0235a09e79312ade95b02179d0
|
3 |
+
size 36520352
|
requirements.txt
CHANGED
@@ -1,3 +1,6 @@
|
|
1 |
numpy
|
2 |
pandas
|
|
|
3 |
streamlit
|
|
|
|
|
|
1 |
numpy
|
2 |
pandas
|
3 |
+
scikit_learn
|
4 |
streamlit
|
5 |
+
torch
|
6 |
+
transformers
|