Spaces:
Sleeping
Sleeping
Upload dashboard.py
Browse files- dashboard.py +45 -15
dashboard.py
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
import warnings
|
|
|
|
|
|
|
2 |
warnings.filterwarnings("ignore")
|
3 |
import io
|
4 |
import os
|
@@ -154,11 +157,12 @@ grouping_filtered = pd.read_csv(os.path.join(data_folder, 'dna_relations.tsv'),
|
|
154 |
def customize_x_axis_bokeh(plot, element):
|
155 |
bokeh_plot = plot.state
|
156 |
bokeh_plot.xaxis.major_label_orientation = 45 # Rotate x-axis labels
|
|
|
157 |
|
158 |
def create_publication_curve_chart():
|
159 |
country_name_df = pd.read_csv(os.path.join(data_folder, 'country_name_map.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
|
160 |
country_name_map = dict(zip(country_name_df.Country_Code, country_name_df.Country_Name))
|
161 |
-
country_name_map
|
162 |
total_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'total_publications_time_indexed.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
|
163 |
country_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'country_publications_time_indexed.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
|
164 |
total_publications_time_indexed.id = np.log1p(total_publications_time_indexed.id)
|
@@ -169,7 +173,7 @@ def create_publication_curve_chart():
|
|
169 |
curve_countries = []
|
170 |
for country in country_name_map.keys():
|
171 |
overlay = overlay * hv.Curve((total_publications_time_indexed.month_bin, country_publications_time_indexed[country]), label=country_name_map[country])
|
172 |
-
overlay.opts(show_legend=True,legend_position='right', width=
|
173 |
return overlay
|
174 |
|
175 |
|
@@ -374,7 +378,7 @@ def generate_radio_buttons(value):
|
|
374 |
button0 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"})
|
375 |
button1 = pn.widgets.Button(name="Publication Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
|
376 |
button2 = pn.widgets.Button(name="Topic Map", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
|
377 |
-
button3 = pn.widgets.Button(name="AECO Macro
|
378 |
button4 = pn.widgets.Button(name="AECO Macro Topics Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
|
379 |
button5 = pn.widgets.Button(name="Research Collaboration Networks: Institutes", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
|
380 |
button6 = pn.widgets.Button(name="Research Collaboration Networks: Countries", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
|
@@ -435,23 +439,49 @@ button6.on_click(lambda event: show_page("Page6"))
|
|
435 |
def CreatePage0():
|
436 |
return pn.Column(pn.pane.Markdown("""
|
437 |
|
438 |
-
|
439 |
-
|
|
|
|
|
|
|
440 |
|
441 |
---------------------------
|
442 |
|
443 |
-
##
|
444 |
-
|
445 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
446 |
|
447 |
-
## AECO Macro Topics
|
448 |
-
In the AECO Macro Topics panel we present the 6-month-sampled time series depicting the number of published research papers
|
449 |
-
for the 16 macro-topics automatically detected by an optimized BerTopic model and ppst-processed for manual topic merging.
|
450 |
|
|
|
|
|
|
|
|
|
|
|
451 |
|
452 |
-
### Research Collaboration Networks: Institutes
|
453 |
|
454 |
-
### Research Collaboration Networks: Authors
|
455 |
""", width=800), align="center")
|
456 |
|
457 |
def CreatePage1():
|
@@ -522,13 +552,13 @@ mapping = {
|
|
522 |
}
|
523 |
|
524 |
#################### SIDEBAR LAYOUT ##########################
|
525 |
-
sidebar = pn.Column(pn.pane.Markdown("##
|
526 |
#button5,
|
527 |
#button6,
|
528 |
styles={"width": "100%", "padding": "15px"})
|
529 |
|
530 |
#################### MAIN AREA LAYOUT ##########################
|
531 |
-
main_area = pn.Column(mapping["
|
532 |
|
533 |
###################### APP LAYOUT ##############################
|
534 |
template = pn.template.BootstrapTemplate(
|
|
|
1 |
import warnings
|
2 |
+
|
3 |
+
from bokeh.models import DatetimeTicker
|
4 |
+
|
5 |
warnings.filterwarnings("ignore")
|
6 |
import io
|
7 |
import os
|
|
|
157 |
def customize_x_axis_bokeh(plot, element):
|
158 |
bokeh_plot = plot.state
|
159 |
bokeh_plot.xaxis.major_label_orientation = 45 # Rotate x-axis labels
|
160 |
+
bokeh_plot.xaxis.ticker = DatetimeTicker(desired_num_ticks=6)
|
161 |
|
162 |
def create_publication_curve_chart():
|
163 |
country_name_df = pd.read_csv(os.path.join(data_folder, 'country_name_map.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
|
164 |
country_name_map = dict(zip(country_name_df.Country_Code, country_name_df.Country_Name))
|
165 |
+
#country_name_map
|
166 |
total_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'total_publications_time_indexed.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
|
167 |
country_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'country_publications_time_indexed.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
|
168 |
total_publications_time_indexed.id = np.log1p(total_publications_time_indexed.id)
|
|
|
173 |
curve_countries = []
|
174 |
for country in country_name_map.keys():
|
175 |
overlay = overlay * hv.Curve((total_publications_time_indexed.month_bin, country_publications_time_indexed[country]), label=country_name_map[country])
|
176 |
+
overlay.opts(show_legend=True,legend_position='right', width=1400, height=600, hooks=[customize_x_axis_bokeh])
|
177 |
return overlay
|
178 |
|
179 |
|
|
|
378 |
button0 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"})
|
379 |
button1 = pn.widgets.Button(name="Publication Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
|
380 |
button2 = pn.widgets.Button(name="Topic Map", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
|
381 |
+
button3 = pn.widgets.Button(name="AECO Macro Topic Hierarchy", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
|
382 |
button4 = pn.widgets.Button(name="AECO Macro Topics Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
|
383 |
button5 = pn.widgets.Button(name="Research Collaboration Networks: Institutes", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
|
384 |
button6 = pn.widgets.Button(name="Research Collaboration Networks: Countries", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
|
|
|
439 |
def CreatePage0():
|
440 |
return pn.Column(pn.pane.Markdown("""
|
441 |
|
442 |
+
|
443 |
+
## Introduction
|
444 |
+
|
445 |
+
This is a dashboard for a Data Analytics project regarding research publications in the AECO domain. The source data consists of around
|
446 |
+
267k English-language research papers gathered from the openalex.org graph database, covering a timeframe from 2011 through early 2024.
|
447 |
|
448 |
---------------------------
|
449 |
|
450 |
+
## Publication Trends
|
451 |
+
|
452 |
+
In the "Publication Trends" panel we show monthly time series of the total number of publications and the number of publications per country (both in log scale), for the top 20
|
453 |
+
countries by number of publications in the dataset.
|
454 |
+
|
455 |
+
|
456 |
+
## Topic Map
|
457 |
+
In the "Topic Map" panel we show a UMAP reduced 2-dimensional visualization of the optimized 52 topic clusters of AECO research papers, embedded using Sentence Transformer model,
|
458 |
+
with the descriptive labels overlayed on the clusters being generated by LLama 2 Large Language Model. Each point in the space represent a paper from the dataset.
|
459 |
+
Hovering over it has the paper title popping up, while clicking on it redirects to the corresponding OpenAlex paper entry page.
|
460 |
+
|
461 |
+
|
462 |
+
## AECO Macro Topic Hierarchy
|
463 |
+
The "AECO Macro Topic Hierarchy" panel allows to explore the dendrogram representation of the optimized clustering, with the leaves of the tree
|
464 |
+
representing the 51 clusters, the intermediate nodes representing merged clusters and the height of the merging (distance from the leaves) indicating topic
|
465 |
+
similarity as based on the cosine distance matrix between topic embeddings.
|
466 |
+
|
467 |
+
|
468 |
+
## AECO Macro Topic Trends
|
469 |
+
The "AECO Macro Topic Trends" panel shows the evolution over time of the 16 AECO macro topics by plotting the semi-annual time series of the absolute numbers of publications per topic.
|
470 |
+
|
471 |
+
## Research Collaboration Networks: Institutes
|
472 |
+
The "Research Collaboration Networks: Institutes" panel contains VOSViewer-generated network representations of the research institutions co-authorship connections, for each of the 16 macro
|
473 |
+
clusters. By selecting a macro-cluster from by the drop-down menu, a graph is loaded whose nodes represent research institutions, edges represent co-authorships relations (with edge
|
474 |
+
thickness being proportional to the frequency of the relations) and the color code clustering highlights the partition of the graph in highly interconnected node groups.
|
475 |
+
Open the VOSViewer left panel to customize the visualization and/or search for a target institution in the graph.
|
476 |
|
|
|
|
|
|
|
477 |
|
478 |
+
## Research Collaboration Networks: Countries
|
479 |
+
The "Research Collaboration Networks: Countries" panel contains VOSViewer-generated network representations of the authors/institutions' country co-authorship connections, for each of
|
480 |
+
the 16 macro clusters. By selecting a macro-cluster from by the drop-down menu, a graph is loaded whose nodes represent authors/institutions' countries, edges represent co-authorships
|
481 |
+
relations (with edge thickness being proportional to the frequency of the relations) and the color code clustering highlights the partition of the graph in highly interconnected node
|
482 |
+
groups. Open the VOSViewer left panel to customize the visualization and/or search for a target country in the graph.
|
483 |
|
|
|
484 |
|
|
|
485 |
""", width=800), align="center")
|
486 |
|
487 |
def CreatePage1():
|
|
|
552 |
}
|
553 |
|
554 |
#################### SIDEBAR LAYOUT ##########################
|
555 |
+
sidebar = pn.Column(pn.pane.Markdown("## Panels"),button0,button1,button2,button3,button4,button5,button6,
|
556 |
#button5,
|
557 |
#button6,
|
558 |
styles={"width": "100%", "padding": "15px"})
|
559 |
|
560 |
#################### MAIN AREA LAYOUT ##########################
|
561 |
+
main_area = pn.Column(mapping["Page0"], styles={"width":"100%"})
|
562 |
|
563 |
###################### APP LAYOUT ##############################
|
564 |
template = pn.template.BootstrapTemplate(
|