dharak003 commited on
Commit
8a65e53
·
verified ·
1 Parent(s): dfa56e9

Upload 6 files

Browse files
Files changed (6) hide show
  1. app.py +131 -0
  2. data/cosmetics.csv +0 -0
  3. data/image_1.jpg +0 -0
  4. data/image_2.jpg +0 -0
  5. data/test.py +1 -0
  6. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.manifold import TSNE
5
+ from scipy.spatial.distance import cdist
6
+
7
+ st.markdown('<style>h1{color: white;}</style>', unsafe_allow_html=True)
8
+ st.title('Call on Doc Skin Care Product Recommender')
9
+ st.write('Find the Right Skin Care for you')
10
+
11
+ st.write("Hi there! If you have a skincare product you currently like I can help you find a similar one based on the ingredients.")
12
+
13
+ st.write('Please select a product below so I can recommend similar ones')
14
+ # Load the data
15
+ df = pd.read_csv("./data/cosmetics.csv")
16
+
17
+ # Choose a product category
18
+ category = st.selectbox(label='Select a product category', options= df['Label'].unique() )
19
+ category_subset = df[df['Label'] == category]
20
+ # Choose a brand
21
+ brand = st.selectbox(label='Select a brand', options= sorted(category_subset['Brand'].unique()))
22
+ category_brand_subset = category_subset[category_subset['Brand'] == brand]
23
+ # Choose product
24
+ product = st.selectbox(label='Select the product', options= sorted(category_brand_subset['Name'].unique() ))
25
+
26
+ #skin_type = st.selectbox(label='Select your skin type', options= ['Combination',
27
+ # 'Dry', 'Normal', 'Oily', 'Sensitive'] )
28
+
29
+ ## Helper functions
30
+ # Define the oh_encoder function
31
+ def oh_encoder(tokens):
32
+ x = np.zeros(N)
33
+ for ingredient in tokens:
34
+ # Get the index for each ingredient
35
+ idx = ingredient_idx[ingredient]
36
+ # Put 1 at the corresponding indices
37
+ x[idx] = 1
38
+ return x
39
+
40
+ def closest_point(point, points):
41
+ """ Find closest point from a list of points. """
42
+ return points[cdist([point], points).argmin()]
43
+
44
+
45
+ if category is not None:
46
+ category_subset = df[df['Label'] == category]
47
+
48
+ if product is not None:
49
+ #skincare_type = category_subset[category_subset[str(skin_type)] == 1]
50
+
51
+ # Reset index
52
+ category_subset = category_subset.reset_index(drop=True)
53
+
54
+ # Display data frame
55
+ #st.dataframe(category_subset)
56
+
57
+ # Initialize dictionary, list, and initial index
58
+ ingredient_idx = {}
59
+ corpus = []
60
+ idx = 0
61
+
62
+ # For loop for tokenization
63
+ for i in range(len(category_subset)):
64
+ ingredients = category_subset['Ingredients'][i]
65
+ ingredients_lower = ingredients.lower()
66
+ tokens = ingredients_lower.split(', ')
67
+ corpus.append(tokens)
68
+ for ingredient in tokens:
69
+ if ingredient not in ingredient_idx:
70
+ ingredient_idx[ingredient] = idx
71
+ idx += 1
72
+
73
+
74
+ # Get the number of items and tokens
75
+ M = len(category_subset)
76
+ N = len(ingredient_idx)
77
+
78
+ # Initialize a matrix of zeros
79
+ A = np.zeros((M,N))
80
+
81
+ # Make a document-term matrix
82
+ i = 0
83
+ for tokens in corpus:
84
+ A[i, :] = oh_encoder(tokens)
85
+ i +=1
86
+
87
+ model_run = st.button('Find similar products!')
88
+
89
+
90
+ if model_run:
91
+
92
+ st.write('Based on the ingredients of the product you selected')
93
+ st.write('here are the top 10 products that are the most similar :sparkles:')
94
+
95
+ # Run the model
96
+ model = TSNE(n_components = 2, learning_rate = 150, random_state = 42)
97
+ tsne_features = model.fit_transform(A)
98
+
99
+ # Make X, Y columns
100
+ category_subset['X'] = tsne_features[:, 0]
101
+ category_subset['Y'] = tsne_features[:, 1]
102
+
103
+ target = category_subset[category_subset['Name'] == product]
104
+
105
+ target_x = target['X'].values[0]
106
+ target_y = target['Y'].values[0]
107
+
108
+ df1 = pd.DataFrame()
109
+ df1['point'] = [(x, y) for x,y in zip(category_subset['X'], category_subset['Y'])]
110
+
111
+ category_subset['distance'] = [cdist(np.array([[target_x,target_y]]), np.array([product]), metric='euclidean') for product in df1['point']]
112
+
113
+ # arrange by descending order
114
+ top_matches = category_subset.sort_values(by=['distance'])
115
+
116
+ # Compute ingredients in common
117
+ target_ingredients = target.Ingredients.values
118
+ c1_list = target_ingredients[0].split(",")
119
+ c1_list = [x.strip(' ') for x in c1_list]
120
+ c1_set = set(c1_list)
121
+
122
+ top_matches['Ingredients in common'] = [c1_set.intersection( set([x.strip(' ')for x in product.split(",")]) ) for product in top_matches['Ingredients']]
123
+
124
+ # Select relevant columns
125
+ top_matches = top_matches[['Label', 'Brand', 'Name', 'Price', 'Ingredients','Ingredients in common']]
126
+ top_matches = top_matches.reset_index(drop=True)
127
+ top_matches = top_matches.drop(top_matches.index[0])
128
+
129
+ st.dataframe(top_matches.head(10))
130
+
131
+
data/cosmetics.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/image_1.jpg ADDED
data/image_2.jpg ADDED
data/test.py ADDED
@@ -0,0 +1 @@
 
 
1
+ testing
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ numpy==1.18.1
2
+ pandas==1.0.1
3
+ scipy==1.4.1
4
+ streamlit==0.69.2
5
+ scikit_learn==0.23.2