CausalDrugsKG_Dashboard

Sleeping

App Files Files Community

zavavan commited on Mar 24

Commit

0f6df01

verified ·

1 Parent(s): 06d2d25

Update dashboard.py

Browse files

Files changed (1) hide show

dashboard.py +136 -429

dashboard.py CHANGED Viewed

@@ -1,8 +1,5 @@
-import json
-import warnings
-from bokeh.models import DatetimeTicker, DatetimeTickFormatter
 warnings.filterwarnings("ignore")
 import io
 import os
@@ -34,25 +31,22 @@ from bokeh.resources import INLINE
 from holoviews.operation.timeseries import rolling, rolling_outlier_std
 hv.extension('bokeh')
 ## LOAD DATASETS
-data_folder = './data'
-country_name_df = pd.read_csv(os.path.join(data_folder, 'country_name_map.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
-country_name_map = dict(zip(country_name_df.Country_Code, country_name_df.Country_Name))
-total_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'total_publications_time_indexed.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
-country_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'country_publications_time_indexed.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
-## AECO topic over time html file:
-AECO_topics_over_time_file_path = '/assets/optimized_merged_AECO_topics_over_time_2D_gpt_labels.html'
-AECO_topics_dendogram_file_path = '/assets/topic_hierarchy_optimal_params.htm'
-AECO_topic_map_path = '/assets/document_datamap_ver2.html'
 regions = ['eu', 'us', 'eu_us']
@@ -61,19 +55,19 @@ sorted_ent_type_freq_map_us=dict()
 sorted_ent_type_freq_map_eu_us=dict()
 def read_top_ent_types():
-    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_type_freq_map_eu.tsv'), 'r'))
     for i,row in enumerate(reader):
         if i < 20:
             k, v = row
             sorted_ent_type_freq_map_eu[k] = int(v)
     del sorted_ent_type_freq_map_eu['Entity']
-    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_type_freq_map_us.tsv'), 'r'))
     for i, row in enumerate(reader):
         if i < 20:
             k, v = row
             sorted_ent_type_freq_map_us[k] = int(v)
     del sorted_ent_type_freq_map_us['Entity']
-    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_type_freq_map_eu_us.tsv'), 'r'))
     for i, row in enumerate(reader):
         if i < 20:
             k, v = row
@@ -86,9 +80,12 @@ top_type_filtered_eu = ['DBpedia:Country', 'DBpedia:Organisation', 'DBpedia:Comp
 top_type_filtered_us = ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:Disease', 'DBpedia:ChemicalSubstance', 'DBpedia:Person', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Region', 'DBpedia:MonoclonalAntibody', 'DBpedia:City', 'DBpedia:Biomolecule']
 top_type_filtered_eu_us =  ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:ChemicalSubstance', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Person', 'DBpedia:Disease', 'DBpedia:MonoclonalAntibody', 'DBpedia:GovernmentAgency', 'DBpedia:Biomolecule', 'DBpedia:Gene']
 def read_top_ent_maps():
-    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_freq_map_eu.tsv'), 'r'), delimiter='\t')
     for row in reader:
         k,v = row
         lista = ast.literal_eval(v)
@@ -98,7 +95,7 @@ def read_top_ent_maps():
         dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
         ent_freq_maps_eu[k]=dizionario
-    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_freq_map_us.tsv'), 'r'), delimiter='\t')
     for row in reader:
         k, v = row
         lista = ast.literal_eval(v)
@@ -108,7 +105,7 @@ def read_top_ent_maps():
         dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
         ent_freq_maps_us[k] = dizionario
-    reader = csv.reader(open(os.path.join(data_folder, 'sorted_ent_freq_map_eu_us.tsv'), 'r'), delimiter='\t')
     for row in reader:
         k, v = row
         lista = ast.literal_eval(v)
@@ -127,15 +124,15 @@ read_top_ent_maps()
 def read_type_filtered_triples():
     for t in top_type_filtered_eu:
-        df = pd.read_csv(data_folder+'/filtered_rows/eu/'+t.replace(':','_')+'.tsv', sep="	", header=0)
         df.drop(columns=['Unnamed: 0'], inplace=True)
         top_type_filtered_triples_eu[t]=df
     for t in top_type_filtered_us:
-        df = pd.read_csv(data_folder+'/filtered_rows/us/'+t.replace(':','_')+'.tsv', sep="	")
         df.drop(columns=['Unnamed: 0'], inplace=True)
         top_type_filtered_triples_us[t]=df
     for t in top_type_filtered_eu_us:
-        df = pd.read_csv(data_folder+'/filtered_rows/eu_us/'+t.replace(':','_')+'.tsv', sep="	")
         df.drop(columns=['Unnamed: 0'], inplace=True)
         top_type_filtered_triples_eu_us[t]=df
@@ -147,238 +144,83 @@ top_type_filtered_triples_eu_us = dict()
 read_type_filtered_triples()
-grouping_filtered = pd.read_csv(os.path.join(data_folder, 'dna_relations.tsv'), sep="	")
-def load_topic2toptasks():
-    with open(os.path.join(data_folder+'/time_series','topic2toptasks.json'), "r", encoding="utf-8") as file:
-        mapping = json.load(file)
-    return mapping
-def loadTaskMethodTimeSeries(topic,task):
-    task_method_ts = pd.read_csv(os.path.join(data_folder+'/time_series', f"""{topic}_{task}_time_series.csv"""),
-                                                  header=0, sep=',', lineterminator='\n', low_memory=False)
-    task_method_ts.set_index(task_method_ts.columns[0], inplace=True)
-    return task_method_ts
-def loadTaskTimeSeries(topic):
-#cluster_{cluster_id}_TASK_time_series.csv
-    task_ts = pd.read_csv(os.path.join(data_folder+'/time_series', f"""cluster_{topic}_TASK_time_series.csv"""),
-                                                  header=0, sep=',', lineterminator='\n', low_memory=False)
-    task_ts.set_index(task_ts.columns[0], inplace=True)
-    return task_ts
-def loadMethodTimeSeries(topic):
-    method_ts = pd.read_csv(os.path.join(data_folder+'/time_series', f"""cluster_{topic}_METHOD_time_series.csv"""),
-                                                  header=0, sep=',', lineterminator='\n', low_memory=False)
-    method_ts.set_index(method_ts.columns[0], inplace=True)
-    return method_ts
-################################# CREATE CHARTS ############################
 ################################# CREATE CHARTS ############################
-# Hook function to customize x-axis for Bokeh
-def customize_x_axis_bokeh(plot, element):
-    bokeh_plot = plot.state
-    bokeh_plot.xaxis.formatter = DatetimeTickFormatter(months='%m%Y')
-    bokeh_plot.xaxis.ticker.desired_num_ticks = 12
-def create_publication_curve_chart():
-    country_name_df = pd.read_csv(os.path.join(data_folder, 'country_name_map.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
-    country_name_map = dict(zip(country_name_df.Country_Code, country_name_df.Country_Name))
-    #country_name_map
-    total_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'total_publications_time_indexed.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
-    total_publications_time_indexed['month_bin'] = pd.to_datetime(total_publications_time_indexed['month_bin'])
-    country_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'country_publications_time_indexed.tsv'),  header=0, sep='\t', lineterminator='\n', low_memory=False)
-    total_publications_time_indexed.id = np.log1p(total_publications_time_indexed.id)
-    country_publications_time_indexed = country_publications_time_indexed.applymap(lambda x: np.log1p(x) if np.issubdtype(type(x), np.number) else x)
-    curve_total = hv.Curve((total_publications_time_indexed.month_bin, total_publications_time_indexed.id), 'Time', 'Publication Counts (log)',label='Total')
     #Overlay the line plots
-    overlay = curve_total
-    curve_countries = []
-    for country in country_name_map.keys():
-       overlay = overlay * hv.Curve((total_publications_time_indexed.month_bin, country_publications_time_indexed[country]), label=country_name_map[country])
-    overlay.opts(show_legend=True,legend_position='right', width=1400, height=900, hooks=[customize_x_axis_bokeh])
     return overlay
-macro_topics_mapping = {"Energy Efficiency and Thermal Comfort in Building Environments":0,
-                        "Indoor Air Quality and Sustainable Air Conditioning Systems":1,
-                        "Urban Development Strategies and Sustainable City Planning":2,
-                        "Enhancing Child-Friendly Urban Spaces Through Design":3,
-                        "Smart city development and urban data management":4,
-                        "Urban Resilience and Green Infrastructure in Climate Change Planning":5,
-                        "Architectural Integration of Solar Photovoltaic Systems in Buildings":6,
-                        "Preservation and Evolution of Traditional Architecture in Modern Contexts":7,
-                        "Sustainable Building Construction and Design with Environmental Assessment":8,
-                        "Landscape Planning and Design Theory":9,
-                        "Urban Sound Environment Research in Architectural Design":10,
-                        "Sustainable Construction Materials and Technologies":11,
-                        "Utilizing BIM in Construction and Building Information Modeling Industry":12,
-                        "Urban Agriculture and Sustainable Food Systems":13,
-                        "Sustainable Bridge Design and Construction":14,
-                        "Investigation of Cavity Dynamics and Heat Transfer in Various Flow Scenarios":15}
-macro_topics_active_subset = ["Energy Efficiency and Thermal Comfort in Building Environments","Architectural Integration of Solar Photovoltaic Systems in Buildings","Utilizing BIM in Construction and Building Information Modeling Industry"]
-def load_institute_network(topic, **kwargs):
-    if topic=='Energy Efficiency and Thermal Comfort in Building Environments':
-        html = """<iframe src="https://tinyurl.com/2d4gl4tl" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Indoor Air Quality and Sustainable Air Conditioning Systems':
-        html = """<iframe src="https://app.vosviewer.com/?json=https%3A%2F%2Fdrive.google.com%2Fuc%3Fid%3D1rqPx3X_9Hnv9mTq2bMCbWWh5VIOw9CRh" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Urban Development Strategies and Sustainable City Planning':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Enhancing Child-Friendly Urban Spaces Through Design':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Smart city development and urban data management':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Urban Resilience and Green Infrastructure in Climate Change Planning':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Architectural Integration of Solar Photovoltaic Systems in Buildings':
-        html = """<iframe src="https://tinyurl.com/2a2ha2r8" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Preservation and Evolution of Traditional Architecture in Modern Contexts':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Sustainable Building Construction and Design with Environmental Assessment':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Landscape Planning and Design Theory':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Urban Sound Environment Research in Architectural Design':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Sustainable Construction Materials and Technologies':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Utilizing BIM in Construction and Building Information Modeling Industry':
-        html = """<iframe src="https://app.vosviewer.com/?json=https%3A%2F%2Fdrive.google.com%2Fuc%3Fid%3D1V-Cto19dxV_GR3MtNP6Yk642CnTQkjEK" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Urban Agriculture and Sustainable Food Systems':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Sustainable Bridge Design and Construction':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Investigation of Cavity Dynamics and Heat Transfer in Various Flow Scenarios':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-def load_country_network(topic, **kwargs):
-    if topic=='Energy Efficiency and Thermal Comfort in Building Environments':
-        html = """<iframe src="https://tinyurl.com/2b7sqbdc" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Indoor Air Quality and Sustainable Air Conditioning Systems':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Urban Development Strategies and Sustainable City Planning':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Enhancing Child-Friendly Urban Spaces Through Design':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Smart city development and urban data management':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Urban Resilience and Green Infrastructure in Climate Change Planning':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Architectural Integration of Solar Photovoltaic Systems in Buildings':
-        html = """<iframe src="https://tinyurl.com/29mkxzep" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Preservation and Evolution of Traditional Architecture in Modern Contexts':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Sustainable Building Construction and Design with Environmental Assessment':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Landscape Planning and Design Theory':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Urban Sound Environment Research in Architectural Design':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Sustainable Construction Materials and Technologies':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Utilizing BIM in Construction and Building Information Modeling Industry':
-        html = """<iframe src="https://tinyurl.com/2ynebkcr" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Urban Agriculture and Sustainable Food Systems':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Sustainable Bridge Design and Construction':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-    elif topic=='Investigation of Cavity Dynamics and Heat Transfer in Various Flow Scenarios':
-        html = """<iframe src="" width="1000" height="800"></iframe>"""
-        html_pane = pn.pane.HTML(html)
-        return html_pane
-def create_overlay_plot(subject_df):
-    overlay = hv.Overlay()
-    for obj_column in subject_df.columns:
-        overlay *= hv.Curve((subject_df.index, subject_df[obj_column]), 'Time', 'Frequency', label=obj_column)
-    overlay.opts(
-        show_legend=True,
-        legend_position='right',
-        width=1400,
-        height=900
-    )
     return overlay
 ############################# WIDGETS & CALLBACK ###########################################
 def filter_data0(df, min_value):
@@ -437,20 +279,15 @@ def generate_radio_buttons(value):
 # https://tabler-icons.io/
-button0 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"})
-button1 = pn.widgets.Button(name="Publication Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
-button2 = pn.widgets.Button(name="Topic Map", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
-button3 = pn.widgets.Button(name="AECO Macro Topic Hierarchy", button_type="warning",  icon="chart-dots-3", styles={"width": "100%"})
-button4 = pn.widgets.Button(name="AECO Macro Topics Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
-button5 = pn.widgets.Button(name="Research Collaboration Networks: Institutes", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
-button6 = pn.widgets.Button(name="Research Collaboration Networks: Countries", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
-button7 = pn.widgets.Button(name="Research Tasks and Methods Trends", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
 region1 = pn.widgets.RadioButtonGroup(name='### Select News Region', options=regions)
-macro_topics_button = pn.widgets.Select(name='Select Macro Topic', value='Energy Efficiency and Thermal Comfort in Building Environments', options=macro_topics_active_subset)
 # Initial RadioButtonGroup
 radio_buttons_regions =  pn.widgets.RadioButtonGroup(options=regions,value='eu',name='Select region')
@@ -467,6 +304,7 @@ def update_radio_group(event):
 # bind the function to the widget(s)
 # Bind the selected value of the first RadioButtonGroup to update the second RadioButtonGroup
 radio_buttons_regions.param.watch(update_radio_group, 'value')
@@ -487,227 +325,96 @@ region_radio_button.param.watch(update_holomap, 'value')
 def show_page(page_key):
     main_area.clear()
     main_area.append(mapping[page_key])
-button0.on_click(lambda event: show_page("Page0"))
 button1.on_click(lambda event: show_page("Page1"))
 button2.on_click(lambda event: show_page("Page2"))
 button3.on_click(lambda event: show_page("Page3"))
 button4.on_click(lambda event: show_page("Page4"))
 button5.on_click(lambda event: show_page("Page5"))
-button6.on_click(lambda event: show_page("Page6"))
-button7.on_click(lambda event: show_page("Page7"))
 ### CREATE PAGE LAYOUTS
-def CreatePage0():
     return pn.Column(pn.pane.Markdown("""
-## Introduction
-This is a dashboard for a Data Analytics project regarding research publications in the AECO domain. The source data consists of around 267k English-language research papers gathered from the openalex.org graph database, covering a timeframe from 2011 through early 2024.
 ---------------------------
-## Publication Trends
-In the "Publication Trends" panel we show monthly time series of the total number of publications and the number of publications per country (both in log scale), for the top 20 countries by number of publications in the dataset.
-## Topic Map
-In the "Topic Map" panel we show a UMAP reduced 2-dimensional visualization of the optimized 52 topic clusters of AECO research papers, embedded using Sentence Transformer model, with the descriptive labels overlayed on the clusters being generated by LLama 2 Large Language Model. Each point in the space represent a paper from the dataset.
-Hovering over it has the paper title popping up, while clicking on it redirects to the corresponding OpenAlex paper entry page.
-## AECO Macro Topic Hierarchy
-The "AECO Macro Topic Hierarchy" panel allows to explore the dendrogram representation of the optimized clustering, with the leaves of the tree representing the 51 clusters, the intermediate nodes representing merged clusters and the height of the merging (distance from the leaves) indicating topic
-similarity as based on the cosine distance matrix between topic embeddings.
-## AECO Macro Topic Trends
-The "AECO Macro Topic Trends" panel shows the evolution over time of the 16 AECO macro topics by plotting the semi-annual time series of the absolute numbers of publications per topic.
-## Research Collaboration Networks: Institutes
-The "Research Collaboration Networks: Institutes" panel contains VOSViewer-generated network representations of the research institutions co-authorship connections, for each of the 16 macro clusters. By selecting a macro-cluster from by the drop-down menu, a graph is loaded whose nodes represent research institutions, edges represent co-authorships relations (with edge thickness being proportional to the frequency of the relations) and the color code clustering highlights the partition of the graph in highly interconnected node groups.
-Open the VOSViewer left panel to customize the visualization and/or search for a target institution in the graph.
-## Research Collaboration Networks: Countries
-The "Research Collaboration Networks: Countries" panel contains VOSViewer-generated network representations of the authors/institutions' country co-authorship connections, for each of the 16 macro clusters. By selecting a macro-cluster from by the drop-down menu, a graph is loaded whose nodes represent authors/institutions' countries, edges represent co-authorships relations (with edge thickness being proportional to the frequency of the relations) and the color code clustering highlights the partition of the graph in highly interconnected node  groups.
-Open the VOSViewer left panel to customize the visualization and/or search for a target country in the graph.
-""", width=1000), align="center")
-def CreatePage1():
-    return pn.Column(
-        pn.pane.Markdown("## Publication Trends "),
-        create_publication_curve_chart(),
-        align="center",
-    )
 def CreatePage2():
-    # Load the HTML content from the local file
-    #with open(AECO_topics_over_time_file_path, 'r', encoding='utf-8') as file:
-    #    html_content = file.read()
-    # Use an iframe to load the local HTML file
-    iframe_html = f'<iframe src="{AECO_topic_map_path}" width="1400px" height="1200px"></iframe>'
-    # Create an HTML pane to render the content
-    html_pane = pn.pane.HTML(iframe_html , sizing_mode='stretch_both')
-    return pn.Column(pn.pane.Markdown(" ## AECO Topic Map "), html_pane, align="center")
-def CreatePage3():
-    # Load the HTML content from the local file
-    #with open(AECO_topics_over_time_file_path, 'r', encoding='utf-8') as file:
-    #    html_content = file.read()
-    # Use an iframe to load the local HTML file
-    iframe_html = f'<iframe src="{AECO_topics_dendogram_file_path}" width="1400px" height="1200px"></iframe>'
-    # Create an HTML pane to render the content
-    html_pane = pn.pane.HTML(iframe_html , sizing_mode='stretch_both')
-    return pn.Column(pn.pane.Markdown(" ## AECO Macro Topics Dendogram "), html_pane, align="center")
-def CreatePage4():
-    # Load the HTML content from the local file
-    #with open(AECO_topics_over_time_file_path, 'r', encoding='utf-8') as file:
-    #    html_content = file.read()
-    # Use an iframe to load the local HTML file
-    iframe_html = f'<iframe src="{AECO_topics_over_time_file_path}" width="1400px" height="1200px"></iframe>'
-    # Create an HTML pane to render the content
-    html_pane = pn.pane.HTML(iframe_html , sizing_mode='stretch_both')
-    return pn.Column(pn.pane.Markdown(" ## AECO Macro Topics "), html_pane, align="center")
-def CreatePage5():
     return pn.Column(
-    macro_topics_button,
-       pn.bind(load_institute_network, macro_topics_button),
         align="center",
     )
-def CreatePage6():
     return pn.Column(
-    macro_topics_button,
-       pn.bind(load_country_network, macro_topics_button),
         align="center",
     )
-def CreatePage7():
     return pn.Column(
-    macro_topics_button,
-       pn.bind(load_Task_Method_trends, macro_topics_button),
-        align="center",
-    )
-def load_Task_Method_trends(topic, **kwargs):
-    task_data = dict()
-    # Check if macro_topics_mapping exists
-    if topic not in macro_topics_mapping:
-        raise ValueError(f"Topic '{topic}' not found in macro_topics_mapping")
-    macro_topic_str = str(macro_topics_mapping[topic])
-    #load the tasks and methods timeseries dataframe for the selected topic
-    task_ts_df = loadTaskTimeSeries(macro_topic_str)
-    method_ts_df = loadMethodTimeSeries(macro_topic_str)
-    task_overlay = create_overlay_plot(task_ts_df)
-    method_overlay = create_overlay_plot(method_ts_df)
-    # Recolor and relabel elements in overlays
-    task_overlay_colored = hv.Overlay([
-        curve.opts(color='red', legend_label=curve.label if curve.label else f"Series {i + 1}")
-        for i, curve in enumerate(task_overlay)
-    ])
-    method_overlay_colored = hv.Overlay([
-        curve.opts(color='blue', legend_label=curve.label if curve.label else f"Series {i + 1}")
-        for i, curve in enumerate(method_overlay)
-    ])
-    # Merge overlays into one plot
-    merged_overlay = task_overlay_colored * method_overlay_colored
-    merged_overlay.opts(
-        show_legend=True,
-        legend_position='right',
-        width=1400,
-        height=900
-    )
     return pn.Column(
-        merged_overlay
-    )
-def load_Task_Method_triple_trends(topic, **kwargs):
-    task_data = dict()
-    # Check if macro_topics_mapping exists
-    if topic not in macro_topics_mapping:
-        raise ValueError(f"Topic '{topic}' not found in macro_topics_mapping")
-    macro_topic_str = str(macro_topics_mapping[topic])
-    topic2toptasks = load_topic2toptasks()
-    #load the task_timeseries dataframe for each of the task mapped from the selected topic in the dict topic2toptasks
-    for task in topic2toptasks.get(macro_topic_str, []):
-        task_data[task] = loadTaskMethodTimeSeries(macro_topic_str, task)
-    # Create Panel UI with dropdown selection
-    task_dropdown = pn.widgets.Select(
-        name="Select TASK",
-        options=[key for key in task_data.keys()] if task_data else ["No available tasks"]
-    )
-    #@pn.depends(task_dropdown.param.value)
-    def load_task_overlays(task):
-        if task not in task_data:
-            return hv.Text(0.5, 0.5, "No data available", halign="center")
-        else:
-            task_method_df = task_data.get(task)
-            return create_overlay_plot(task_method_df)
-    return pn.Column(
-        task_dropdown,
-        pn.bind(load_task_overlays, task_dropdown)
-    )
 mapping = {
-    "Page0": CreatePage0(),
     "Page1": CreatePage1(),
     "Page2": CreatePage2(),
     "Page3": CreatePage3(),
     "Page4": CreatePage4(),
     "Page5": CreatePage5(),
-    "Page6": CreatePage6(),
-    "Page7": CreatePage7()
 }
 #################### SIDEBAR LAYOUT ##########################
-sidebar = pn.Column(pn.pane.Markdown("## Panels"),button0,button1,button2,button3,button4,button5,button6,button7,
-                    #button5,
-                    #button6,
-                    styles={"width": "100%", "padding": "15px"})
 #################### MAIN AREA LAYOUT ##########################
-main_area = pn.Column(mapping["Page0"], styles={"width":"100%"})
 ###################### APP LAYOUT ##############################
 template = pn.template.BootstrapTemplate(
-    title=" AECO Tech Dashboard",
     sidebar=[sidebar],
     main=[main_area],
     header_background="black",
-    #site="Charting the Landscape of AECO Research",
     theme=pn.template.DarkTheme,
-    sidebar_width=330, ## Default is 330
     busy_indicator=pn.indicators.BooleanStatus(value=True),
 )

+import warnings
 warnings.filterwarnings("ignore")
 import io
 import os
 from holoviews.operation.timeseries import rolling, rolling_outlier_std
 hv.extension('bokeh')
 ## LOAD DATASETS
+dna_folder = './data'
+#### full data unfiltered:
+dna_articles_unfiltered_eu_time_indexed_resampled = pd.read_csv(os.path.join(dna_folder, 'dna_articles_unfiltered_eu_time_indexed_resampled.tsv'),sep='\t',header=0)
+dna_articles_unfiltered_us_time_indexed_resampled = pd.read_csv(os.path.join(dna_folder, 'dna_articles_unfiltered_us_time_indexed_resampled.tsv'),sep='\t',header=0)
+dna_articles_unfiltered_eu_us_time_indexed_resampled = pd.read_csv(os.path.join(dna_folder, 'dna_articles_unfiltered_eu_us_time_indexed_resampled.tsv'),sep='\t',header=0)
+#### classifier filtered articles:
+dh_ration_df_eu = pd.read_csv(os.path.join(dna_folder, 'dh_ration_df_eu.tsv'),sep='\t',header=0)
+dh_ration_df_us = pd.read_csv(os.path.join(dna_folder, 'dh_ration_df_us.tsv'),sep='\t',header=0)
+dh_ration_df_eu_us = pd.read_csv(os.path.join(dna_folder, 'dh_ration_df_eu_us.tsv'),sep='\t',header=0)
 regions = ['eu', 'us', 'eu_us']
 sorted_ent_type_freq_map_eu_us=dict()
 def read_top_ent_types():
+    reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_type_freq_map_eu.tsv'), 'r'))
     for i,row in enumerate(reader):
         if i < 20:
             k, v = row
             sorted_ent_type_freq_map_eu[k] = int(v)
     del sorted_ent_type_freq_map_eu['Entity']
+    reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_type_freq_map_us.tsv'), 'r'))
     for i, row in enumerate(reader):
         if i < 20:
             k, v = row
             sorted_ent_type_freq_map_us[k] = int(v)
     del sorted_ent_type_freq_map_us['Entity']
+    reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_type_freq_map_eu_us.tsv'), 'r'))
     for i, row in enumerate(reader):
         if i < 20:
             k, v = row
 top_type_filtered_us = ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:Disease', 'DBpedia:ChemicalSubstance', 'DBpedia:Person', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Region', 'DBpedia:MonoclonalAntibody', 'DBpedia:City', 'DBpedia:Biomolecule']
 top_type_filtered_eu_us =  ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:ChemicalSubstance', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Person', 'DBpedia:Disease', 'DBpedia:MonoclonalAntibody', 'DBpedia:GovernmentAgency', 'DBpedia:Biomolecule', 'DBpedia:Gene']
+dna_healthtech_articles_eu_time_indexed_resampled=pd.read_csv(os.path.join(dna_folder, 'dna_healthtech_articles_eu_time_indexed_resampled.tsv'),sep='\t',header=0)
+dna_healthtech_articles_us_time_indexed_resampled=pd.read_csv(os.path.join(dna_folder, 'dna_healthtech_articles_us_time_indexed_resampled.tsv'),sep='\t',header=0)
+dna_healthtech_articles_eu_us_time_indexed_resampled=pd.read_csv(os.path.join(dna_folder, 'dna_healthtech_articles_eu_us_time_indexed_resampled.tsv'),sep='\t',header=0)
 def read_top_ent_maps():
+    reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_freq_map_eu.tsv'), 'r'), delimiter='\t')
     for row in reader:
         k,v = row
         lista = ast.literal_eval(v)
         dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
         ent_freq_maps_eu[k]=dizionario
+    reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_freq_map_us.tsv'), 'r'), delimiter='\t')
     for row in reader:
         k, v = row
         lista = ast.literal_eval(v)
         dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
         ent_freq_maps_us[k] = dizionario
+    reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_freq_map_eu_us.tsv'), 'r'), delimiter='\t')
     for row in reader:
         k, v = row
         lista = ast.literal_eval(v)
 def read_type_filtered_triples():
     for t in top_type_filtered_eu:
+        df = pd.read_csv(dna_folder+'/filtered_rows/eu/'+t.replace(':','_')+'.tsv', sep="	", header=0)
         df.drop(columns=['Unnamed: 0'], inplace=True)
         top_type_filtered_triples_eu[t]=df
     for t in top_type_filtered_us:
+        df = pd.read_csv(dna_folder+'/filtered_rows/us/'+t.replace(':','_')+'.tsv', sep="	")
         df.drop(columns=['Unnamed: 0'], inplace=True)
         top_type_filtered_triples_us[t]=df
     for t in top_type_filtered_eu_us:
+        df = pd.read_csv(dna_folder+'/filtered_rows/eu_us/'+t.replace(':','_')+'.tsv', sep="	")
         df.drop(columns=['Unnamed: 0'], inplace=True)
         top_type_filtered_triples_eu_us[t]=df
 read_type_filtered_triples()
+grouping_filtered = pd.read_csv(os.path.join(dna_folder, 'dna_relations.tsv'), sep="	")
 ################################# CREATE CHARTS ############################
+def create_curve_chart():
+    # Create the 3 line plots
+    curve_eu = hv.Curve((dh_ration_df_eu.index, dh_ration_df_eu.ids/dna_articles_unfiltered_eu_time_indexed_resampled.ids), 'Time', 'Digital Health News Ratio',label='EU')
+    curve_us = hv.Curve((dh_ration_df_us.index, dh_ration_df_us.ids/dna_articles_unfiltered_us_time_indexed_resampled.ids),'Time', 'Digital Health News Ratio', label='US')
+    curve_eu_us = hv.Curve((dh_ration_df_eu_us.index, dh_ration_df_eu_us.ids/dna_articles_unfiltered_eu_us_time_indexed_resampled.ids),'Time', 'Digital Health News Ratio', label='EU-US')
     #Overlay the line plots
+    overlay = curve_eu * curve_us * curve_eu_us
+    overlay.opts(show_legend = True, legend_position='top_left', width=1200, height=600)
     return overlay
+def create_bar_charts(region, **kwargs):
+  if region=='eu':
+    sliced = sorted_ent_type_freq_map_eu
+    return hv.Bars(sliced, hv.Dimension('Entity Types'), 'Frequency').opts( framewise=True, xrotation=45,width=1200, height=600)
+  elif region=='us':
+    sliced = sorted_ent_type_freq_map_us
+    return hv.Bars(sliced, hv.Dimension('Entity Types'), 'Frequency').opts(framewise=True, xrotation=45,width=1200, height=600)
+  elif region=='eu_us':
+    sliced = sorted_ent_type_freq_map_eu_us
+    return hv.Bars(sliced, hv.Dimension('Entity Types'), 'Frequency').opts(framewise=True, xrotation=45,width=1200, height=600)
+# Define a function to generate Curve based on selected values
+def generate_entity_curves(region_value, type_value, **kwargs):
+  if region_value=='eu':
+    top20Ents = ent_freq_maps_eu[type_value]
+    curveList = []
+    for ent in top20Ents:
+      entityTriples =  top_type_filtered_triples_eu[type_value][(top_type_filtered_triples_eu[type_value]['subjEntityLinks']==ent[0]) | (top_type_filtered_triples_eu[type_value]['objEntityLinks']==ent[0])]
+      entityTriples_time_indexed = entityTriples.set_index(pd.DatetimeIndex(entityTriples['timestamp']), inplace=False)
+      del entityTriples_time_indexed['timestamp']
+      entityTriples_time_indexed_resampled = entityTriples_time_indexed.resample("Y").count()
+      #print(entityTriples_time_indexed_resampled)
+      entityTriples_time_indexed_resampled = entityTriples_time_indexed_resampled.reindex(dna_healthtech_articles_eu_time_indexed_resampled.index, fill_value=0)
+      curve = hv.Curve((entityTriples_time_indexed_resampled.index, (entityTriples_time_indexed_resampled['doc_id']/dna_healthtech_articles_eu_time_indexed_resampled['ids'])), 'Time', 'Key Entity Occurrence', label=ent[0])
+      curve.opts(autorange='y')
+      #curve.opts(logy=True)
+      curveList.append(curve)
+    overlay = hv.Overlay(curveList)
+    overlay.opts(legend_muted=False, legend_cols=4, show_legend = True, legend_position='top_left', fontsize={'legend':13},width=1200, height=800)
+    return overlay
+  elif region_value=='us':
+    top20Ents = ent_freq_maps_us[type_value]
+    curveList = []
+    for ent in top20Ents:
+      entityTriples =  top_type_filtered_triples_us[type_value][(top_type_filtered_triples_us[type_value]['subjEntityLinks']==ent[0]) | (top_type_filtered_triples_us[type_value]['objEntityLinks']==ent[0])]
+      entityTriples_time_indexed = entityTriples.set_index(pd.DatetimeIndex(entityTriples['timestamp']), inplace=False)
+      del entityTriples_time_indexed['timestamp']
+      entityTriples_time_indexed_resampled = entityTriples_time_indexed_resampled.reindex(dna_healthtech_articles_us_time_indexed_resampled.index, fill_value=0)
+      curve = hv.Curve((entityTriples_time_indexed_resampled.index, (entityTriples_time_indexed_resampled['doc_id']/dna_healthtech_articles_us_time_indexed_resampled['ids'])), 'Time', 'Key Entity Occurrence', label=ent[0])
+      curve.opts(autorange='y')
+      curveList.append(curve)
+    overlay = hv.Overlay(curveList)
+    overlay.opts(legend_muted=False, legend_cols=4, show_legend = True, legend_position='top_left', fontsize={'legend':13},width=1200, height=800)
+    return overlay
+  elif region_value=='eu_us':
+    top20Ents = ent_freq_maps_eu_us[type_value]
+    curveList = []
+    for ent in top20Ents:
+      entityTriples =  top_type_filtered_triples_eu_us[type_value][(top_type_filtered_triples_eu_us[type_value]['subjEntityLinks']==ent[0]) | (top_type_filtered_triples_eu_us[type_value]['objEntityLinks']==ent[0])]
+      entityTriples_time_indexed = entityTriples.set_index(pd.DatetimeIndex(entityTriples['timestamp']), inplace=False)
+      del entityTriples_time_indexed['timestamp']
+      entityTriples_time_indexed_resampled = entityTriples_time_indexed_resampled.reindex(dna_healthtech_articles_eu_us_time_indexed_resampled.index, fill_value=0)
+      curve = hv.Curve((entityTriples_time_indexed_resampled.index, (entityTriples_time_indexed_resampled['doc_id']/dna_healthtech_articles_eu_us_time_indexed_resampled['ids'])), 'Time', 'Key Entity Occurrence', label=ent[0])
+      curve.opts(autorange='y')
+      curveList.append(curve)
+    overlay = hv.Overlay(curveList)
+    overlay.opts(legend_muted=False, legend_cols=4, show_legend = True, legend_position='top_left', fontsize={'legend':13},width=1200, height=800)
     return overlay
 ############################# WIDGETS & CALLBACK ###########################################
 def filter_data0(df, min_value):
 # https://tabler-icons.io/
+button1 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"})
+button2 = pn.widgets.Button(name="Health Tech News Ratio", button_type="warning",  icon="chart-histogram", styles={"width": "100%"})
+button3 = pn.widgets.Button(name="Top Entity Types", button_type="warning", icon="chart-bar", styles={"width": "100%"})
+button4 = pn.widgets.Button(name="Top Key Entities", button_type="warning", icon="chart-dots-filled", styles={"width": "100%"})
+button5 = pn.widgets.Button(name="Entity Chord Diagrams", button_type="warning", icon="chart-dots-filled", styles={"width": "100%"})
 region1 = pn.widgets.RadioButtonGroup(name='### Select News Region', options=regions)
 # Initial RadioButtonGroup
 radio_buttons_regions =  pn.widgets.RadioButtonGroup(options=regions,value='eu',name='Select region')
 # bind the function to the widget(s)
+dmap2 = hv.DynamicMap(pn.bind(generate_entity_curves, radio_buttons_regions,radio_buttons_types))
 # Bind the selected value of the first RadioButtonGroup to update the second RadioButtonGroup
 radio_buttons_regions.param.watch(update_radio_group, 'value')
 def show_page(page_key):
     main_area.clear()
     main_area.append(mapping[page_key])
 button1.on_click(lambda event: show_page("Page1"))
 button2.on_click(lambda event: show_page("Page2"))
 button3.on_click(lambda event: show_page("Page3"))
 button4.on_click(lambda event: show_page("Page4"))
 button5.on_click(lambda event: show_page("Page5"))
 ### CREATE PAGE LAYOUTS
+def CreatePage1():
     return pn.Column(pn.pane.Markdown("""
+This is a dashboard for a News Analysis project regarding Digital Health technology. The source data consists of around 7.8 million English-language news articles gathered from the **Dow Jones Data, News, and Analytics (DNA)**
+platform (https://www.dowjones.com/professional/developer-platform/) covering a timeframe from September 1987 through December 2023. The news items text content is copyrighted and cannot be shared within this project.
+Some of the data analytics visualizations show here come from a Knowledge Graph automatically extracted from DNA news sources. A Virtuoso SPARQL endpoint to this graph (named 'DHNEWS KG') is set up at the
+URL: https://api-vast.jrc.service.ec.europa.eu/sparql/
 ---------------------------
+## 1. Health Tech News Ratio
+In the Health Tech News Ratio panel we present the month-sampled time series depicting the proportion of 97k news articles con-
+cerning Digital Health, as identified by a text classifier, out of the total number of English language DNA news articles pertaining to Europe and the US
+### 2. Top Entity Types
+The Top Entity Types bar plots in the dashboard show the predominant DBpedia-inherited entity types within the graph for triples tagged with Europe, US, and EU-US region codes via their article support.
+## 3. Top Key Entities
+The Top Key Entities plots track the occurrence of several key entities per year, where occurrence means the entity is either the Subject or Object of an extracted triple in the KG.
+## 4. Entity Chord Diagrams
+Entity Chord Diagrams represent the most frequently connected entity pairs within the KG through chord illustrations, serving as both Subjects and Objects of predicative triples.
+The size of the chords corresponds to the support of the depicted relations.
+""", width=800), align="center")
 def CreatePage2():
     return pn.Column(
+        pn.pane.Markdown("## Health Tech News Ratio "),
+        create_curve_chart(),
         align="center",
     )
+def CreatePage3():
     return pn.Column(
+    region1,
+        pn.bind(create_bar_charts, region1),
         align="center",
     )
+def CreatePage4():
     return pn.Column(
+        pn.pane.Markdown("## Top Key Entities "),
+        pn.Row(pn.Column(radio_buttons_regions, radio_buttons_types), dmap2),
+        align="center", )
+def CreatePage5():
     return pn.Column(
+        pn.pane.Markdown("## Entity Chord Diagrams "),
+        pn.Row(region_radio_button, pn.bind(filter_region, region_radio_button)),
+        align="center", )
 mapping = {
     "Page1": CreatePage1(),
     "Page2": CreatePage2(),
     "Page3": CreatePage3(),
     "Page4": CreatePage4(),
     "Page5": CreatePage5(),
 }
 #################### SIDEBAR LAYOUT ##########################
+sidebar = pn.Column(pn.pane.Markdown("## Pages"), button1,button2,button3,
+                    button4,
+                    button5,
+					styles={"width": "100%", "padding": "15px"})
 #################### MAIN AREA LAYOUT ##########################
+main_area = pn.Column(mapping["Page1"], styles={"width":"100%"})
 ###################### APP LAYOUT ##############################
 template = pn.template.BootstrapTemplate(
+    title=" Digital Health in the News: Analytics Dashboard ",
     sidebar=[sidebar],
     main=[main_area],
     header_background="black",
+    #site="Charting the Landscape of Digital Health",
     theme=pn.template.DarkTheme,
+    sidebar_width=250, ## Default is 330
     busy_indicator=pn.indicators.BooleanStatus(value=True),
 )