Spaces:
Runtime error
Runtime error
File size: 5,303 Bytes
69abbc0 5511f86 2db6e66 03bdce4 69abbc0 5511f86 69abbc0 03bdce4 2db6e66 03bdce4 2db6e66 03bdce4 69abbc0 2db6e66 03bdce4 69abbc0 5511f86 69abbc0 2db6e66 053c69e 69abbc0 2db6e66 69abbc0 5511f86 69abbc0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import streamlit as st
from pathlib import Path
import json
from support_functions import HealthseaSearch
def visualize_dataset():
# Configuration
health_aspect_path = Path("data/health_aspects.json")
product_path = Path("data/products.json")
condition_path = Path("data/condition_vectors.json")
benefit_path = Path("data/benefit_vectors.json")
# Load data
@st.cache(allow_output_mutation=True)
def load_data(
_health_aspect_path: Path,
_product_path: Path,
_condition_path: Path,
_benefit_path: Path,
):
with open(_health_aspect_path) as reader:
health_aspects = json.load(reader)
with open(_product_path) as reader:
products = json.load(reader)
with open(_condition_path) as reader:
conditions = json.load(reader)
with open(_benefit_path) as reader:
benefits = json.load(reader)
return health_aspects, products, conditions, benefits
# Functions
def kpi(n, text):
html = f"""
<div class='kpi'>
<h1 class='kpi_header'>{n}</h1>
<span>{text}</span>
</div>
"""
return html
def central_text(text):
html = f"""<h2 class='central_text'>{text}</h2>"""
return html
# Loading data
health_aspects, products, conditions, benefits = load_data(
health_aspect_path, product_path, condition_path, benefit_path
)
search_engine = HealthseaSearch(health_aspects, products, conditions, benefits)
# KPI
st.info("""This app showcases a dataset of up to one million reviews that was analyzed by the Healthsea pipeline. You can search for any health aspect, whether it's a disease (e.g. joint pain) or a positive state of health (e.g. energy), the app will output a list of products and substances.
These products have a score which is calculated by the content of their reviews.""")
st.warning("""Please note that Healthsea is a research project and a proof-of-concept that presents a technical approach on analyzing user-generated reviews.
The results produced by Healthsea should not be used as a foundation for treating health problems and neither do we want to advocate that supplementary products are able to solve all health issues.""")
st.markdown("""---""")
st.markdown(central_text("π Dataset"), unsafe_allow_html=True)
kpi_products, kpi_reviews, kpi_condition, kpi_benefit = st.columns(4)
def round_to_k(value):
return str(round(value/1000,1))+"k"
kpi_products.markdown(kpi(round_to_k(len(products)), "Products"), unsafe_allow_html=True)
kpi_reviews.markdown(kpi(round_to_k(int(933240)), "Reviews"), unsafe_allow_html=True)
kpi_condition.markdown(kpi(round_to_k(len(conditions)), "Conditions"), unsafe_allow_html=True)
kpi_benefit.markdown(kpi(round_to_k(len(benefits)), "Benefits"), unsafe_allow_html=True)
st.markdown("""---""")
# Expander
show_conditions, show_benefits = st.columns(2)
with show_conditions.expander("Top mentioned Conditions"):
st.write(search_engine.get_all_conditions_df())
with show_benefits.expander("Top mentioned Benefits"):
st.write(search_engine.get_all_benefits_df())
st.markdown("""---""")
# Search
search = st.text_input(label="Search for an health aspect", value="joint pain")
n = st.slider("Show top n results", min_value=10, max_value=1000, value=25)
st.markdown("""---""")
st.markdown(central_text("π§ Products"), unsafe_allow_html=True)
st.info("""The products are scored based on what reviewers say. Additional variables in the scoring function are product rating, helpful count and whether the review is considered 'fake'. """)
# DataFrame
st.write(search_engine.get_products_df(search, n))
# KPI & Alias
aspect_alias = search_engine.get_aspect(search)["alias"]
kpi_product_mentions, kpi_alias = st.columns(2)
kpi_product_mentions.markdown(kpi(len(search_engine.get_aspect(search)["products"]), "Products"), unsafe_allow_html=True)
if len(aspect_alias) > 0:
kpi_alias.markdown(
kpi(len(aspect_alias), "Similar health aspects"),
unsafe_allow_html=True,
)
vectors = []
main_aspect = search_engine.get_aspect_meta(search)
vectors.append((main_aspect["name"], main_aspect["vector"]))
for aspect in aspect_alias:
current_aspect = search_engine.get_aspect_meta(aspect)
vectors.append((current_aspect["name"], current_aspect["vector"]))
st.markdown("\n")
st.info("""To improve the search, the table also shows results of other health aspects with a high similarity""")
#st.write(search_engine.tsne_plot(vectors))
search_engine.pyvis(vectors)
st.markdown("""---""")
# Substances
st.markdown(central_text("π― Substances"), unsafe_allow_html=True)
st.info("""The scores of the substances are based on the products""")
# DataFrame
st.write(search_engine.get_substances_df(search, n))
kpi_substances, empty = st.columns(2)
kpi_substances.markdown(
kpi(len(search_engine.get_aspect(search)["substance"]), "Substances"),
unsafe_allow_html=True,
)
|