Spaces:

bacancydataprophets
/

Skin_Care_Product_Recommender

Sleeping

App Files Files Community

Skin_Care_Product_Recommender / app.py

dharak003

Upload 6 files

8a65e53 verified 3 months ago

raw

history blame contribute delete

4.39 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.manifold import TSNE
	from scipy.spatial.distance import cdist

	st.markdown('<style>h1{color: white;}</style>', unsafe_allow_html=True)
	st.title('Call on Doc Skin Care Product Recommender')
	st.write('Find the Right Skin Care for you')

	st.write("Hi there! If you have a skincare product you currently like I can help you find a similar one based on the ingredients.")

	st.write('Please select a product below so I can recommend similar ones')
	# Load the data
	df = pd.read_csv("./data/cosmetics.csv")

	# Choose a product category
	category = st.selectbox(label='Select a product category', options= df['Label'].unique() )
	category_subset = df[df['Label'] == category]
	# Choose a brand
	brand = st.selectbox(label='Select a brand', options= sorted(category_subset['Brand'].unique()))
	category_brand_subset = category_subset[category_subset['Brand'] == brand]
	# Choose product
	product = st.selectbox(label='Select the product', options= sorted(category_brand_subset['Name'].unique() ))

	#skin_type = st.selectbox(label='Select your skin type', options= ['Combination',
	# 'Dry', 'Normal', 'Oily', 'Sensitive'] )

	## Helper functions
	# Define the oh_encoder function
	def oh_encoder(tokens):
	x = np.zeros(N)
	for ingredient in tokens:
	# Get the index for each ingredient
	idx = ingredient_idx[ingredient]
	# Put 1 at the corresponding indices
	x[idx] = 1
	return x

	def closest_point(point, points):
	""" Find closest point from a list of points. """
	return points[cdist([point], points).argmin()]


	if category is not None:
	category_subset = df[df['Label'] == category]

	if product is not None:
	#skincare_type = category_subset[category_subset[str(skin_type)] == 1]

	# Reset index
	category_subset = category_subset.reset_index(drop=True)

	# Display data frame
	#st.dataframe(category_subset)

	# Initialize dictionary, list, and initial index
	ingredient_idx = {}
	corpus = []
	idx = 0

	# For loop for tokenization
	for i in range(len(category_subset)):
	ingredients = category_subset['Ingredients'][i]
	ingredients_lower = ingredients.lower()
	tokens = ingredients_lower.split(', ')
	corpus.append(tokens)
	for ingredient in tokens:
	if ingredient not in ingredient_idx:
	ingredient_idx[ingredient] = idx
	idx += 1


	# Get the number of items and tokens
	M = len(category_subset)
	N = len(ingredient_idx)

	# Initialize a matrix of zeros
	A = np.zeros((M,N))

	# Make a document-term matrix
	i = 0
	for tokens in corpus:
	A[i, :] = oh_encoder(tokens)
	i +=1

	model_run = st.button('Find similar products!')


	if model_run:

	st.write('Based on the ingredients of the product you selected')
	st.write('here are the top 10 products that are the most similar :sparkles:')

	# Run the model
	model = TSNE(n_components = 2, learning_rate = 150, random_state = 42)
	tsne_features = model.fit_transform(A)

	# Make X, Y columns
	category_subset['X'] = tsne_features[:, 0]
	category_subset['Y'] = tsne_features[:, 1]

	target = category_subset[category_subset['Name'] == product]

	target_x = target['X'].values[0]
	target_y = target['Y'].values[0]

	df1 = pd.DataFrame()
	df1['point'] = [(x, y) for x,y in zip(category_subset['X'], category_subset['Y'])]

	category_subset['distance'] = [cdist(np.array([[target_x,target_y]]), np.array([product]), metric='euclidean') for product in df1['point']]

	# arrange by descending order
	top_matches = category_subset.sort_values(by=['distance'])

	# Compute ingredients in common
	target_ingredients = target.Ingredients.values
	c1_list = target_ingredients[0].split(",")
	c1_list = [x.strip(' ') for x in c1_list]
	c1_set = set(c1_list)

	top_matches['Ingredients in common'] = [c1_set.intersection( set([x.strip(' ')for x in product.split(",")]) ) for product in top_matches['Ingredients']]

	# Select relevant columns
	top_matches = top_matches[['Label', 'Brand', 'Name', 'Price', 'Ingredients','Ingredients in common']]
	top_matches = top_matches.reset_index(drop=True)
	top_matches = top_matches.drop(top_matches.index[0])

	st.dataframe(top_matches.head(10))