Spaces:
Sleeping
Sleeping
romnatall
commited on
Commit
·
fe4db0a
1
Parent(s):
4eb227a
ребаланс весов
Browse files- app.py +21 -7
- data/books_model (2).ipynb +43 -2
- data/vectorizer_actors.pkl +3 -0
app.py
CHANGED
@@ -19,6 +19,8 @@ input_search = st.text_input('Search')
|
|
19 |
|
20 |
|
21 |
|
|
|
|
|
22 |
data = np.load('data/embeddings_bert.npy')
|
23 |
|
24 |
def top_indices(array, n,upsc=False):
|
@@ -44,6 +46,8 @@ def embed_bert_cls(text, ):
|
|
44 |
model_output = model(**{k: v.to(model.device) for k, v in t.items()})
|
45 |
embeddings = model_output.last_hidden_state[:, 0, :]
|
46 |
embeddings = torch.nn.functional.normalize(embeddings)
|
|
|
|
|
47 |
return embeddings[0].cpu().numpy()
|
48 |
|
49 |
@st.cache_resource
|
@@ -53,21 +57,31 @@ def getmodels():
|
|
53 |
logreg = pickle.load(f)
|
54 |
with open('data/tf_idf_vectorizer.pkl', 'rb') as f:
|
55 |
vectorizer = pickle.load(f)
|
56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
@st.cache_data
|
59 |
def predict_rating(input_search):
|
60 |
|
61 |
|
62 |
-
logreg, vectorizer=getmodels()
|
63 |
|
64 |
emb = embed_bert_cls(input_search)
|
65 |
X=np.column_stack((data, np.tile(emb, (data.shape[0], 1))))
|
66 |
|
67 |
user_tfidf = vectorizer.transform([input_search])
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
71 |
similarity_scores_desc = cosine_similarity(user_tfidf, tfidf_matrix)
|
72 |
similarity_scores_name = cosine_similarity(user_tfidf, tfidf_matrix2)
|
73 |
|
@@ -75,10 +89,10 @@ def predict_rating(input_search):
|
|
75 |
y_emb = cosine_similarity(data, emb.reshape(1, -1)).reshape(-1)
|
76 |
|
77 |
|
78 |
-
y=(similarity_scores_desc*0.9+similarity_scores_name*0.
|
79 |
st.session_state["pred"]=y
|
80 |
|
81 |
-
return top_indices(y,
|
82 |
|
83 |
|
84 |
|
|
|
19 |
|
20 |
|
21 |
|
22 |
+
|
23 |
+
|
24 |
data = np.load('data/embeddings_bert.npy')
|
25 |
|
26 |
def top_indices(array, n,upsc=False):
|
|
|
46 |
model_output = model(**{k: v.to(model.device) for k, v in t.items()})
|
47 |
embeddings = model_output.last_hidden_state[:, 0, :]
|
48 |
embeddings = torch.nn.functional.normalize(embeddings)
|
49 |
+
|
50 |
+
|
51 |
return embeddings[0].cpu().numpy()
|
52 |
|
53 |
@st.cache_resource
|
|
|
57 |
logreg = pickle.load(f)
|
58 |
with open('data/tf_idf_vectorizer.pkl', 'rb') as f:
|
59 |
vectorizer = pickle.load(f)
|
60 |
+
|
61 |
+
with open('data/vectorizer_actors.pkl', 'rb') as f:
|
62 |
+
vectorizer_actors = pickle.load(f)
|
63 |
+
|
64 |
+
tfidf_matrix = vectorizer.transform(movies['description'])
|
65 |
+
tfidf_matrix2 = vectorizer.transform(movies['name'])
|
66 |
+
tfidf_actors = vectorizer_actors.transform(movies['actors'].fillna(''))
|
67 |
+
|
68 |
+
|
69 |
+
return logreg, vectorizer,vectorizer_actors ,tfidf_matrix,tfidf_matrix2,tfidf_actors
|
70 |
|
71 |
@st.cache_data
|
72 |
def predict_rating(input_search):
|
73 |
|
74 |
|
75 |
+
logreg, vectorizer,vectorizer_actors,tfidf_matrix,tfidf_matrix2,tfidf_actors=getmodels()
|
76 |
|
77 |
emb = embed_bert_cls(input_search)
|
78 |
X=np.column_stack((data, np.tile(emb, (data.shape[0], 1))))
|
79 |
|
80 |
user_tfidf = vectorizer.transform([input_search])
|
81 |
+
user_actors = vectorizer_actors.transform([input_search])
|
82 |
+
|
83 |
+
similarity_actors=cosine_similarity(user_actors, tfidf_actors).reshape(-1)
|
84 |
+
|
85 |
similarity_scores_desc = cosine_similarity(user_tfidf, tfidf_matrix)
|
86 |
similarity_scores_name = cosine_similarity(user_tfidf, tfidf_matrix2)
|
87 |
|
|
|
89 |
y_emb = cosine_similarity(data, emb.reshape(1, -1)).reshape(-1)
|
90 |
|
91 |
|
92 |
+
y=(similarity_scores_desc*0.9+similarity_scores_name*0.0083+y_emb*0.9+similarity_actors*0.5).reshape(-1)
|
93 |
st.session_state["pred"]=y
|
94 |
|
95 |
+
return top_indices(y, 20,upsc=False)
|
96 |
|
97 |
|
98 |
|
data/books_model (2).ipynb
CHANGED
@@ -355,6 +355,47 @@
|
|
355 |
"data.head()"
|
356 |
]
|
357 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
358 |
{
|
359 |
"cell_type": "code",
|
360 |
"execution_count": 3,
|
@@ -418,12 +459,12 @@
|
|
418 |
},
|
419 |
{
|
420 |
"cell_type": "code",
|
421 |
-
"execution_count":
|
422 |
"metadata": {},
|
423 |
"outputs": [],
|
424 |
"source": [
|
425 |
"import pickle\n",
|
426 |
-
"with open('
|
427 |
" pickle.dump(vectorizer, f)"
|
428 |
]
|
429 |
},
|
|
|
355 |
"data.head()"
|
356 |
]
|
357 |
},
|
358 |
+
{
|
359 |
+
"cell_type": "code",
|
360 |
+
"execution_count": 6,
|
361 |
+
"metadata": {},
|
362 |
+
"outputs": [
|
363 |
+
{
|
364 |
+
"data": {
|
365 |
+
"text/plain": [
|
366 |
+
"0 В американской хоррор-комедии показана детект...\n",
|
367 |
+
"1 Перестройка уже шагнула с кремлевских трибун ...\n",
|
368 |
+
"2 В Городе Стихий обитатели огня, воды, земли и...\n",
|
369 |
+
"3 Свои незабываемые каникулы, в которых есть ме...\n",
|
370 |
+
"4 Увлекательные приключения скандинавского бога...\n",
|
371 |
+
" ... \n",
|
372 |
+
"28443 Петер фон Кант - успешный режиссёр. Его прият...\n",
|
373 |
+
"28444 Объединившись с бывшим полицейским Зоуи Кэсс,...\n",
|
374 |
+
"28445 13 перетекающих из одного в другой эпизодов, ...\n",
|
375 |
+
"28446 Стремясь спасти свою сестру Софию, попавшую п...\n",
|
376 |
+
"28447 Три друга, Арав, Бхушан и Сунил, оказываются ...\n",
|
377 |
+
"Name: description, Length: 28448, dtype: object"
|
378 |
+
]
|
379 |
+
},
|
380 |
+
"execution_count": 6,
|
381 |
+
"metadata": {},
|
382 |
+
"output_type": "execute_result"
|
383 |
+
}
|
384 |
+
],
|
385 |
+
"source": [
|
386 |
+
"data['description']"
|
387 |
+
]
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"cell_type": "code",
|
391 |
+
"execution_count": 8,
|
392 |
+
"metadata": {},
|
393 |
+
"outputs": [],
|
394 |
+
"source": [
|
395 |
+
"vectorizer = TfidfVectorizer()\n",
|
396 |
+
"a=vectorizer.fit_transform(data['actors'].fillna(''))\n"
|
397 |
+
]
|
398 |
+
},
|
399 |
{
|
400 |
"cell_type": "code",
|
401 |
"execution_count": 3,
|
|
|
459 |
},
|
460 |
{
|
461 |
"cell_type": "code",
|
462 |
+
"execution_count": 9,
|
463 |
"metadata": {},
|
464 |
"outputs": [],
|
465 |
"source": [
|
466 |
"import pickle\n",
|
467 |
+
"with open('vectorizer_actors.pkl', 'wb') as f:\n",
|
468 |
" pickle.dump(vectorizer, f)"
|
469 |
]
|
470 |
},
|
data/vectorizer_actors.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a171b209102d3999b7c6a5f91f26f02d7506c870ad740cb3c87b4a03593c4f68
|
3 |
+
size 2967595
|