File size: 4,270 Bytes
69abbc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import streamlit as st
from pathlib import Path
import json
from support_functions import HealthseaSearch

def visualize_dataset():
    # Configuration
    health_aspect_path = Path("data/health_aspects.json")
    product_path = Path("data/products.json")
    condition_path = Path("data/condition_vectors.json")
    benefit_path = Path("data/benefit_vectors.json")


    # Load data
    @st.cache(allow_output_mutation=True)
    def load_data(
        _health_aspect_path: Path,
        _product_path: Path,
        _condition_path: Path,
        _benefit_path: Path,
    ):
        with open(_health_aspect_path) as reader:
            health_aspects = json.load(reader)
        with open(_product_path) as reader:
            products = json.load(reader)
        with open(_condition_path) as reader:
            conditions = json.load(reader)
        with open(_benefit_path) as reader:
            benefits = json.load(reader)
        return health_aspects, products, conditions, benefits


    # Functions
    def kpi(n, text):
        html = f"""
        <div class='kpi'>
            <h1 class='kpi_header'>{n}</h1>
            <span>{text}</span>
        </div>
        """
        return html


    def central_text(text):
        html = f"""<h2 class='central_text'>{text}</h2>"""
        return html

    # Loading data
    health_aspects, products, conditions, benefits = load_data(
        health_aspect_path, product_path, condition_path, benefit_path
    )
    search_engine = HealthseaSearch(health_aspects, products, conditions, benefits)

    # KPI

    st.markdown("""---""")

    st.markdown(central_text("πŸŽ€ Dataset"), unsafe_allow_html=True)

    kpi_products, kpi_reviews, kpi_condition, kpi_benefit = st.columns(4)

    kpi_products.markdown(kpi(len(products), "Products"), unsafe_allow_html=True)
    kpi_reviews.markdown(kpi(933.240, "Reviews"), unsafe_allow_html=True)
    kpi_condition.markdown(kpi(len(conditions), "Conditions"), unsafe_allow_html=True)
    kpi_benefit.markdown(kpi(len(benefits), "Benefits"), unsafe_allow_html=True)

    st.markdown("""---""")

    # Search
    search = st.text_input(label="Search for an health aspect", value="joint pain")
    n = st.slider("Show top n results", min_value=10, max_value=1000, value=25)

    st.markdown("""---""")
    st.markdown(central_text("πŸ§ƒ Products"), unsafe_allow_html=True)

    # DataFrame
    st.write(search_engine.get_products_df(search, n))

    # KPI & Alias
    aspect_alias = search_engine.get_aspect(search)["alias"]

    if len(aspect_alias) > 0:
        kpi_mentions, kpi_product_mentions, kpi_alias = st.columns(3)
        kpi_mentions.markdown(
            kpi(search_engine.get_aspect_meta(search)["frequency"], "Mentions"),
            unsafe_allow_html=True,
        )
        kpi_product_mentions.markdown(
            kpi(len(search_engine.get_aspect(search)["products"]), "Products"),
            unsafe_allow_html=True,
        )
        kpi_alias.markdown(
            kpi(len(aspect_alias), "Similar health aspects"),
            unsafe_allow_html=True,
        )

        vectors = []
        main_aspect = search_engine.get_aspect_meta(search)
        vectors.append((main_aspect["name"], main_aspect["vector"]))
        for aspect in aspect_alias:
            current_aspect = search_engine.get_aspect_meta(aspect)
            vectors.append((current_aspect["name"], current_aspect["vector"]))
        st.markdown("\n")
        st.write(search_engine.tsne_plot(vectors))

    else:
        kpi_mentions, kpi_product_mentions = st.columns(2)
        kpi_mentions.markdown(
            kpi(search_engine.get_aspect_meta(search)["frequency"], "Mentions"),
            unsafe_allow_html=True,
        )
        kpi_product_mentions.markdown(
            kpi(len(search_engine.get_aspect(search)["products"]), "Products"),
            unsafe_allow_html=True,
        )

    st.markdown("""---""")

    # Substances
    st.markdown(central_text("🍯 Substances"), unsafe_allow_html=True)

    # DataFrame
    st.write(search_engine.get_substances_df(search, n))
    kpi_tmp, kpi_substances = st.columns(2)
    kpi_substances.markdown(
        kpi(len(search_engine.get_aspect(search)["substance"]), "Substances"),
        unsafe_allow_html=True,
    )