Spaces:

bacancydataprophets
/

Skin_Care_Product_Recommender

Sleeping

App Files Files Community

dharak003 commited on Apr 2

Commit

8a65e53

verified ·

1 Parent(s): dfa56e9

Upload 6 files

Browse files

Files changed (6) hide show

app.py +131 -0
data/cosmetics.csv +0 -0
data/image_1.jpg +0 -0
data/image_2.jpg +0 -0
data/test.py +1 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+from sklearn.manifold import TSNE
+from scipy.spatial.distance import cdist
+st.markdown('<style>h1{color: white;}</style>', unsafe_allow_html=True)
+st.title('Call on Doc Skin Care Product Recommender')
+st.write('Find the Right Skin Care for you')
+st.write("Hi there! If you have a skincare product you currently like I can help you find a similar one based on the ingredients.")
+st.write('Please select a product below so I can recommend similar ones')
+# Load the data
+df = pd.read_csv("./data/cosmetics.csv")
+# Choose a product category
+category = st.selectbox(label='Select a product category', options= df['Label'].unique() )
+category_subset = df[df['Label'] == category]
+# Choose a brand
+brand = st.selectbox(label='Select a brand', options= sorted(category_subset['Brand'].unique()))
+category_brand_subset = category_subset[category_subset['Brand'] == brand]
+# Choose product
+product = st.selectbox(label='Select the product', options= sorted(category_brand_subset['Name'].unique() ))
+#skin_type = st.selectbox(label='Select your skin type', options= ['Combination',
+#       'Dry', 'Normal', 'Oily', 'Sensitive'] )
+## Helper functions
+# Define the oh_encoder function
+def oh_encoder(tokens):
+    x = np.zeros(N)
+    for ingredient in tokens:
+        # Get the index for each ingredient
+        idx = ingredient_idx[ingredient]
+        # Put 1 at the corresponding indices
+        x[idx] = 1
+    return x
+def closest_point(point, points):
+    """ Find closest point from a list of points. """
+    return points[cdist([point], points).argmin()]
+if category is not None:
+    category_subset = df[df['Label'] == category]
+if product is not None:
+    #skincare_type = category_subset[category_subset[str(skin_type)] == 1]
+    # Reset index
+    category_subset = category_subset.reset_index(drop=True)
+    # Display data frame
+    #st.dataframe(category_subset)
+    # Initialize dictionary, list, and initial index
+    ingredient_idx = {}
+    corpus = []
+    idx = 0
+    # For loop for tokenization
+    for i in range(len(category_subset)):
+        ingredients = category_subset['Ingredients'][i]
+        ingredients_lower = ingredients.lower()
+        tokens = ingredients_lower.split(', ')
+        corpus.append(tokens)
+        for ingredient in tokens:
+            if ingredient not in ingredient_idx:
+                ingredient_idx[ingredient] = idx
+                idx += 1
+    # Get the number of items and tokens
+    M = len(category_subset)
+    N = len(ingredient_idx)
+    # Initialize a matrix of zeros
+    A = np.zeros((M,N))
+    # Make a document-term matrix
+    i = 0
+    for tokens in corpus:
+        A[i, :] = oh_encoder(tokens)
+        i +=1
+model_run = st.button('Find similar products!')
+if model_run:
+    st.write('Based on the ingredients of the product you selected')
+    st.write('here are the top 10 products that are the most similar :sparkles:')
+    # Run the model
+    model = TSNE(n_components = 2, learning_rate = 150, random_state = 42)
+    tsne_features = model.fit_transform(A)
+    # Make X, Y columns
+    category_subset['X'] = tsne_features[:, 0]
+    category_subset['Y'] = tsne_features[:, 1]
+    target = category_subset[category_subset['Name'] == product]
+    target_x = target['X'].values[0]
+    target_y = target['Y'].values[0]
+    df1 = pd.DataFrame()
+    df1['point'] = [(x, y) for x,y in zip(category_subset['X'], category_subset['Y'])]
+    category_subset['distance'] = [cdist(np.array([[target_x,target_y]]), np.array([product]), metric='euclidean') for product in df1['point']]
+    # arrange by descending order
+    top_matches = category_subset.sort_values(by=['distance'])
+    # Compute ingredients in common
+    target_ingredients = target.Ingredients.values
+    c1_list = target_ingredients[0].split(",")
+    c1_list = [x.strip(' ') for x in c1_list]
+    c1_set = set(c1_list)
+    top_matches['Ingredients in common'] = [c1_set.intersection( set([x.strip(' ')for x in product.split(",")]) ) for product in top_matches['Ingredients']]
+    # Select relevant columns
+    top_matches = top_matches[['Label', 'Brand', 'Name', 'Price', 'Ingredients','Ingredients in common']]
+    top_matches = top_matches.reset_index(drop=True)
+    top_matches = top_matches.drop(top_matches.index[0])
+    st.dataframe(top_matches.head(10))

data/cosmetics.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/image_1.jpg ADDED Viewed

data/image_2.jpg ADDED Viewed

data/test.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ testing

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+numpy==1.18.1
+pandas==1.0.1
+scipy==1.4.1
+streamlit==0.69.2
+scikit_learn==0.23.2