zavavan commited on
Commit
5d5bc80
·
verified ·
1 Parent(s): adfe2a3

Upload dashboard.py

Browse files
Files changed (1) hide show
  1. dashboard.py +45 -15
dashboard.py CHANGED
@@ -1,4 +1,7 @@
1
  import warnings
 
 
 
2
  warnings.filterwarnings("ignore")
3
  import io
4
  import os
@@ -154,11 +157,12 @@ grouping_filtered = pd.read_csv(os.path.join(data_folder, 'dna_relations.tsv'),
154
  def customize_x_axis_bokeh(plot, element):
155
  bokeh_plot = plot.state
156
  bokeh_plot.xaxis.major_label_orientation = 45 # Rotate x-axis labels
 
157
 
158
  def create_publication_curve_chart():
159
  country_name_df = pd.read_csv(os.path.join(data_folder, 'country_name_map.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
160
  country_name_map = dict(zip(country_name_df.Country_Code, country_name_df.Country_Name))
161
- country_name_map
162
  total_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'total_publications_time_indexed.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
163
  country_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'country_publications_time_indexed.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
164
  total_publications_time_indexed.id = np.log1p(total_publications_time_indexed.id)
@@ -169,7 +173,7 @@ def create_publication_curve_chart():
169
  curve_countries = []
170
  for country in country_name_map.keys():
171
  overlay = overlay * hv.Curve((total_publications_time_indexed.month_bin, country_publications_time_indexed[country]), label=country_name_map[country])
172
- overlay.opts(show_legend=True,legend_position='right', width=1200, height=500, hooks=[customize_x_axis_bokeh])
173
  return overlay
174
 
175
 
@@ -374,7 +378,7 @@ def generate_radio_buttons(value):
374
  button0 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"})
375
  button1 = pn.widgets.Button(name="Publication Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
376
  button2 = pn.widgets.Button(name="Topic Map", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
377
- button3 = pn.widgets.Button(name="AECO Macro Topics Hierarchy", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
378
  button4 = pn.widgets.Button(name="AECO Macro Topics Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
379
  button5 = pn.widgets.Button(name="Research Collaboration Networks: Institutes", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
380
  button6 = pn.widgets.Button(name="Research Collaboration Networks: Countries", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
@@ -435,23 +439,49 @@ button6.on_click(lambda event: show_page("Page6"))
435
  def CreatePage0():
436
  return pn.Column(pn.pane.Markdown("""
437
 
438
- This is a dashboard for a Research Analysis project regarding research and technology in the AECO domain. The source data consists of around
439
- 276k English-language research papers gathered from the openalex.org graph database, covering a timeframe from 2011 through 2024.
 
 
 
440
 
441
  ---------------------------
442
 
443
- ## AECO Topic Map
444
- In the AECO Topic Map panel we show the 6-month-sampled time series depicting the number of published research papers
445
- for the 16 macro-topics automatically detected by an optimized BerTopic model and ppst-processed for manual topic merging.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
 
447
- ## AECO Macro Topics
448
- In the AECO Macro Topics panel we present the 6-month-sampled time series depicting the number of published research papers
449
- for the 16 macro-topics automatically detected by an optimized BerTopic model and ppst-processed for manual topic merging.
450
 
 
 
 
 
 
451
 
452
- ### Research Collaboration Networks: Institutes
453
 
454
- ### Research Collaboration Networks: Authors
455
  """, width=800), align="center")
456
 
457
  def CreatePage1():
@@ -522,13 +552,13 @@ mapping = {
522
  }
523
 
524
  #################### SIDEBAR LAYOUT ##########################
525
- sidebar = pn.Column(pn.pane.Markdown("## Pages"),button0,button1,button2,button3,button4,button5,button6,
526
  #button5,
527
  #button6,
528
  styles={"width": "100%", "padding": "15px"})
529
 
530
  #################### MAIN AREA LAYOUT ##########################
531
- main_area = pn.Column(mapping["Page1"], styles={"width":"100%"})
532
 
533
  ###################### APP LAYOUT ##############################
534
  template = pn.template.BootstrapTemplate(
 
1
  import warnings
2
+
3
+ from bokeh.models import DatetimeTicker
4
+
5
  warnings.filterwarnings("ignore")
6
  import io
7
  import os
 
157
  def customize_x_axis_bokeh(plot, element):
158
  bokeh_plot = plot.state
159
  bokeh_plot.xaxis.major_label_orientation = 45 # Rotate x-axis labels
160
+ bokeh_plot.xaxis.ticker = DatetimeTicker(desired_num_ticks=6)
161
 
162
  def create_publication_curve_chart():
163
  country_name_df = pd.read_csv(os.path.join(data_folder, 'country_name_map.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
164
  country_name_map = dict(zip(country_name_df.Country_Code, country_name_df.Country_Name))
165
+ #country_name_map
166
  total_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'total_publications_time_indexed.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
167
  country_publications_time_indexed = pd.read_csv(os.path.join(data_folder, 'country_publications_time_indexed.tsv'), header=0, sep='\t', lineterminator='\n', low_memory=False)
168
  total_publications_time_indexed.id = np.log1p(total_publications_time_indexed.id)
 
173
  curve_countries = []
174
  for country in country_name_map.keys():
175
  overlay = overlay * hv.Curve((total_publications_time_indexed.month_bin, country_publications_time_indexed[country]), label=country_name_map[country])
176
+ overlay.opts(show_legend=True,legend_position='right', width=1400, height=600, hooks=[customize_x_axis_bokeh])
177
  return overlay
178
 
179
 
 
378
  button0 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"})
379
  button1 = pn.widgets.Button(name="Publication Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
380
  button2 = pn.widgets.Button(name="Topic Map", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
381
+ button3 = pn.widgets.Button(name="AECO Macro Topic Hierarchy", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
382
  button4 = pn.widgets.Button(name="AECO Macro Topics Trends", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
383
  button5 = pn.widgets.Button(name="Research Collaboration Networks: Institutes", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
384
  button6 = pn.widgets.Button(name="Research Collaboration Networks: Countries", button_type="warning", icon="chart-dots-3", styles={"width": "100%"})
 
439
  def CreatePage0():
440
  return pn.Column(pn.pane.Markdown("""
441
 
442
+
443
+ ## Introduction
444
+
445
+ This is a dashboard for a Data Analytics project regarding research publications in the AECO domain. The source data consists of around
446
+ 267k English-language research papers gathered from the openalex.org graph database, covering a timeframe from 2011 through early 2024.
447
 
448
  ---------------------------
449
 
450
+ ## Publication Trends
451
+
452
+ In the "Publication Trends" panel we show monthly time series of the total number of publications and the number of publications per country (both in log scale), for the top 20
453
+ countries by number of publications in the dataset.
454
+
455
+
456
+ ## Topic Map
457
+ In the "Topic Map" panel we show a UMAP reduced 2-dimensional visualization of the optimized 52 topic clusters of AECO research papers, embedded using Sentence Transformer model,
458
+ with the descriptive labels overlayed on the clusters being generated by LLama 2 Large Language Model. Each point in the space represent a paper from the dataset.
459
+ Hovering over it has the paper title popping up, while clicking on it redirects to the corresponding OpenAlex paper entry page.
460
+
461
+
462
+ ## AECO Macro Topic Hierarchy
463
+ The "AECO Macro Topic Hierarchy" panel allows to explore the dendrogram representation of the optimized clustering, with the leaves of the tree
464
+ representing the 51 clusters, the intermediate nodes representing merged clusters and the height of the merging (distance from the leaves) indicating topic
465
+ similarity as based on the cosine distance matrix between topic embeddings.
466
+
467
+
468
+ ## AECO Macro Topic Trends
469
+ The "AECO Macro Topic Trends" panel shows the evolution over time of the 16 AECO macro topics by plotting the semi-annual time series of the absolute numbers of publications per topic.
470
+
471
+ ## Research Collaboration Networks: Institutes
472
+ The "Research Collaboration Networks: Institutes" panel contains VOSViewer-generated network representations of the research institutions co-authorship connections, for each of the 16 macro
473
+ clusters. By selecting a macro-cluster from by the drop-down menu, a graph is loaded whose nodes represent research institutions, edges represent co-authorships relations (with edge
474
+ thickness being proportional to the frequency of the relations) and the color code clustering highlights the partition of the graph in highly interconnected node groups.
475
+ Open the VOSViewer left panel to customize the visualization and/or search for a target institution in the graph.
476
 
 
 
 
477
 
478
+ ## Research Collaboration Networks: Countries
479
+ The "Research Collaboration Networks: Countries" panel contains VOSViewer-generated network representations of the authors/institutions' country co-authorship connections, for each of
480
+ the 16 macro clusters. By selecting a macro-cluster from by the drop-down menu, a graph is loaded whose nodes represent authors/institutions' countries, edges represent co-authorships
481
+ relations (with edge thickness being proportional to the frequency of the relations) and the color code clustering highlights the partition of the graph in highly interconnected node
482
+ groups. Open the VOSViewer left panel to customize the visualization and/or search for a target country in the graph.
483
 
 
484
 
 
485
  """, width=800), align="center")
486
 
487
  def CreatePage1():
 
552
  }
553
 
554
  #################### SIDEBAR LAYOUT ##########################
555
+ sidebar = pn.Column(pn.pane.Markdown("## Panels"),button0,button1,button2,button3,button4,button5,button6,
556
  #button5,
557
  #button6,
558
  styles={"width": "100%", "padding": "15px"})
559
 
560
  #################### MAIN AREA LAYOUT ##########################
561
+ main_area = pn.Column(mapping["Page0"], styles={"width":"100%"})
562
 
563
  ###################### APP LAYOUT ##############################
564
  template = pn.template.BootstrapTemplate(