zavavan commited on
Commit
257cd77
·
verified ·
1 Parent(s): bc746fa

Update dashboard.py

Browse files
Files changed (1) hide show
  1. dashboard.py +65 -277
dashboard.py CHANGED
@@ -33,192 +33,44 @@ hv.extension('bokeh')
33
 
34
  ## LOAD DATASETS
35
 
36
- dna_folder = './data'
37
-
38
-
39
- #### full data unfiltered:
40
-
41
- dna_articles_unfiltered_eu_time_indexed_resampled = pd.read_csv(os.path.join(dna_folder, 'dna_articles_unfiltered_eu_time_indexed_resampled.tsv'),sep='\t',header=0)
42
- dna_articles_unfiltered_us_time_indexed_resampled = pd.read_csv(os.path.join(dna_folder, 'dna_articles_unfiltered_us_time_indexed_resampled.tsv'),sep='\t',header=0)
43
- dna_articles_unfiltered_eu_us_time_indexed_resampled = pd.read_csv(os.path.join(dna_folder, 'dna_articles_unfiltered_eu_us_time_indexed_resampled.tsv'),sep='\t',header=0)
44
-
45
- #### classifier filtered articles:
46
-
47
- dh_ration_df_eu = pd.read_csv(os.path.join(dna_folder, 'dh_ration_df_eu.tsv'),sep='\t',header=0)
48
- dh_ration_df_us = pd.read_csv(os.path.join(dna_folder, 'dh_ration_df_us.tsv'),sep='\t',header=0)
49
- dh_ration_df_eu_us = pd.read_csv(os.path.join(dna_folder, 'dh_ration_df_eu_us.tsv'),sep='\t',header=0)
50
-
51
- regions = ['eu', 'us', 'eu_us']
52
-
53
- sorted_ent_type_freq_map_eu=dict()
54
- sorted_ent_type_freq_map_us=dict()
55
- sorted_ent_type_freq_map_eu_us=dict()
56
-
57
- def read_top_ent_types():
58
- reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_type_freq_map_eu.tsv'), 'r'))
59
- for i,row in enumerate(reader):
60
- if i < 20:
61
- k, v = row
62
- sorted_ent_type_freq_map_eu[k] = int(v)
63
- del sorted_ent_type_freq_map_eu['Entity']
64
- reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_type_freq_map_us.tsv'), 'r'))
65
- for i, row in enumerate(reader):
66
- if i < 20:
67
- k, v = row
68
- sorted_ent_type_freq_map_us[k] = int(v)
69
- del sorted_ent_type_freq_map_us['Entity']
70
- reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_type_freq_map_eu_us.tsv'), 'r'))
71
- for i, row in enumerate(reader):
72
- if i < 20:
73
- k, v = row
74
- sorted_ent_type_freq_map_eu_us[k] = int(v)
75
- del sorted_ent_type_freq_map_eu_us['Entity']
76
-
77
- read_top_ent_types()
78
-
79
- top_type_filtered_eu = ['DBpedia:Country', 'DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:Person', 'DBpedia:Disease', 'DBpedia:ChemicalSubstance', 'DBpedia:Drug', 'DBpedia:GovernmentAgency', 'DBpedia:City', 'DBpedia:MonoclonalAntibody']
80
- top_type_filtered_us = ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:Disease', 'DBpedia:ChemicalSubstance', 'DBpedia:Person', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Region', 'DBpedia:MonoclonalAntibody', 'DBpedia:City', 'DBpedia:Biomolecule']
81
- top_type_filtered_eu_us = ['DBpedia:Organisation', 'DBpedia:Company', 'DBpedia:ChemicalSubstance', 'DBpedia:Drug', 'DBpedia:Country', 'DBpedia:Person', 'DBpedia:Disease', 'DBpedia:MonoclonalAntibody', 'DBpedia:GovernmentAgency', 'DBpedia:Biomolecule', 'DBpedia:Gene']
82
-
83
- dna_healthtech_articles_eu_time_indexed_resampled=pd.read_csv(os.path.join(dna_folder, 'dna_healthtech_articles_eu_time_indexed_resampled.tsv'),sep='\t',header=0)
84
- dna_healthtech_articles_us_time_indexed_resampled=pd.read_csv(os.path.join(dna_folder, 'dna_healthtech_articles_us_time_indexed_resampled.tsv'),sep='\t',header=0)
85
- dna_healthtech_articles_eu_us_time_indexed_resampled=pd.read_csv(os.path.join(dna_folder, 'dna_healthtech_articles_eu_us_time_indexed_resampled.tsv'),sep='\t',header=0)
86
-
87
- def read_top_ent_maps():
88
- reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_freq_map_eu.tsv'), 'r'), delimiter='\t')
89
- for row in reader:
90
- k,v = row
91
- lista = ast.literal_eval(v)
92
- dizionario = dict()
93
- for pair in lista:
94
- dizionario[pair[0]]=pair[1]
95
- dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
96
- ent_freq_maps_eu[k]=dizionario
97
-
98
- reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_freq_map_us.tsv'), 'r'), delimiter='\t')
99
- for row in reader:
100
- k, v = row
101
- lista = ast.literal_eval(v)
102
- dizionario = dict()
103
- for pair in lista:
104
- dizionario[pair[0]] = pair[1]
105
- dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
106
- ent_freq_maps_us[k] = dizionario
107
-
108
- reader = csv.reader(open(os.path.join(dna_folder, 'sorted_ent_freq_map_eu_us.tsv'), 'r'), delimiter='\t')
109
- for row in reader:
110
- k, v = row
111
- lista = ast.literal_eval(v)
112
- dizionario = dict()
113
- for pair in lista:
114
- dizionario[pair[0]] = pair[1]
115
- dizionario = sorted(dizionario.items(), key=lambda x: x[1], reverse=True)
116
- ent_freq_maps_eu_us[k] = dizionario
117
-
118
- ent_freq_maps_eu = dict()
119
- ent_freq_maps_us = dict()
120
- ent_freq_maps_eu_us = dict()
121
-
122
- read_top_ent_maps()
123
-
124
-
125
- def read_type_filtered_triples():
126
- for t in top_type_filtered_eu:
127
- df = pd.read_csv(dna_folder+'/filtered_rows/eu/'+t.replace(':','_')+'.tsv', sep=" ", header=0)
128
- df.drop(columns=['Unnamed: 0'], inplace=True)
129
- top_type_filtered_triples_eu[t]=df
130
- for t in top_type_filtered_us:
131
- df = pd.read_csv(dna_folder+'/filtered_rows/us/'+t.replace(':','_')+'.tsv', sep=" ")
132
- df.drop(columns=['Unnamed: 0'], inplace=True)
133
- top_type_filtered_triples_us[t]=df
134
- for t in top_type_filtered_eu_us:
135
- df = pd.read_csv(dna_folder+'/filtered_rows/eu_us/'+t.replace(':','_')+'.tsv', sep=" ")
136
- df.drop(columns=['Unnamed: 0'], inplace=True)
137
- top_type_filtered_triples_eu_us[t]=df
138
-
139
-
140
-
141
- top_type_filtered_triples_eu = dict()
142
- top_type_filtered_triples_us = dict()
143
- top_type_filtered_triples_eu_us = dict()
144
-
145
- read_type_filtered_triples()
146
-
147
- grouping_filtered = pd.read_csv(os.path.join(dna_folder, 'dna_relations.tsv'), sep=" ")
148
  ################################# CREATE CHARTS ############################
149
- def create_curve_chart():
150
- # Create the 3 line plots
151
- curve_eu = hv.Curve((dh_ration_df_eu.index, dh_ration_df_eu.ids/dna_articles_unfiltered_eu_time_indexed_resampled.ids), 'Time', 'Digital Health News Ratio',label='EU')
152
- curve_us = hv.Curve((dh_ration_df_us.index, dh_ration_df_us.ids/dna_articles_unfiltered_us_time_indexed_resampled.ids),'Time', 'Digital Health News Ratio', label='US')
153
- curve_eu_us = hv.Curve((dh_ration_df_eu_us.index, dh_ration_df_eu_us.ids/dna_articles_unfiltered_eu_us_time_indexed_resampled.ids),'Time', 'Digital Health News Ratio', label='EU-US')
154
- #Overlay the line plots
155
- overlay = curve_eu * curve_us * curve_eu_us
156
- overlay.opts(show_legend = True, legend_position='top_left', width=1200, height=600)
157
- return overlay
158
-
159
-
160
- def create_bar_charts(region, **kwargs):
161
- if region=='eu':
162
- sliced = sorted_ent_type_freq_map_eu
163
- return hv.Bars(sliced, hv.Dimension('Entity Types'), 'Frequency').opts( framewise=True, xrotation=45,width=1200, height=600)
164
- elif region=='us':
165
- sliced = sorted_ent_type_freq_map_us
166
- return hv.Bars(sliced, hv.Dimension('Entity Types'), 'Frequency').opts(framewise=True, xrotation=45,width=1200, height=600)
167
- elif region=='eu_us':
168
- sliced = sorted_ent_type_freq_map_eu_us
169
- return hv.Bars(sliced, hv.Dimension('Entity Types'), 'Frequency').opts(framewise=True, xrotation=45,width=1200, height=600)
170
-
171
-
172
-
173
- # Define a function to generate Curve based on selected values
174
- def generate_entity_curves(region_value, type_value, **kwargs):
175
- if region_value=='eu':
176
- top20Ents = ent_freq_maps_eu[type_value]
177
- curveList = []
178
- for ent in top20Ents:
179
- entityTriples = top_type_filtered_triples_eu[type_value][(top_type_filtered_triples_eu[type_value]['subjEntityLinks']==ent[0]) | (top_type_filtered_triples_eu[type_value]['objEntityLinks']==ent[0])]
180
- entityTriples_time_indexed = entityTriples.set_index(pd.DatetimeIndex(entityTriples['timestamp']), inplace=False)
181
- del entityTriples_time_indexed['timestamp']
182
- entityTriples_time_indexed_resampled = entityTriples_time_indexed.resample("Y").count()
183
- #print(entityTriples_time_indexed_resampled)
184
- entityTriples_time_indexed_resampled = entityTriples_time_indexed_resampled.reindex(dna_healthtech_articles_eu_time_indexed_resampled.index, fill_value=0)
185
- curve = hv.Curve((entityTriples_time_indexed_resampled.index, (entityTriples_time_indexed_resampled['doc_id']/dna_healthtech_articles_eu_time_indexed_resampled['ids'])), 'Time', 'Key Entity Occurrence', label=ent[0])
186
- curve.opts(autorange='y')
187
- #curve.opts(logy=True)
188
- curveList.append(curve)
189
- overlay = hv.Overlay(curveList)
190
- overlay.opts(legend_muted=False, legend_cols=4, show_legend = True, legend_position='top_left', fontsize={'legend':13},width=1200, height=800)
191
- return overlay
192
-
193
- elif region_value=='us':
194
- top20Ents = ent_freq_maps_us[type_value]
195
- curveList = []
196
- for ent in top20Ents:
197
- entityTriples = top_type_filtered_triples_us[type_value][(top_type_filtered_triples_us[type_value]['subjEntityLinks']==ent[0]) | (top_type_filtered_triples_us[type_value]['objEntityLinks']==ent[0])]
198
- entityTriples_time_indexed = entityTriples.set_index(pd.DatetimeIndex(entityTriples['timestamp']), inplace=False)
199
- del entityTriples_time_indexed['timestamp']
200
- entityTriples_time_indexed_resampled = entityTriples_time_indexed_resampled.reindex(dna_healthtech_articles_us_time_indexed_resampled.index, fill_value=0)
201
- curve = hv.Curve((entityTriples_time_indexed_resampled.index, (entityTriples_time_indexed_resampled['doc_id']/dna_healthtech_articles_us_time_indexed_resampled['ids'])), 'Time', 'Key Entity Occurrence', label=ent[0])
202
- curve.opts(autorange='y')
203
- curveList.append(curve)
204
- overlay = hv.Overlay(curveList)
205
- overlay.opts(legend_muted=False, legend_cols=4, show_legend = True, legend_position='top_left', fontsize={'legend':13},width=1200, height=800)
206
- return overlay
207
-
208
- elif region_value=='eu_us':
209
- top20Ents = ent_freq_maps_eu_us[type_value]
210
- curveList = []
211
- for ent in top20Ents:
212
- entityTriples = top_type_filtered_triples_eu_us[type_value][(top_type_filtered_triples_eu_us[type_value]['subjEntityLinks']==ent[0]) | (top_type_filtered_triples_eu_us[type_value]['objEntityLinks']==ent[0])]
213
- entityTriples_time_indexed = entityTriples.set_index(pd.DatetimeIndex(entityTriples['timestamp']), inplace=False)
214
- del entityTriples_time_indexed['timestamp']
215
- entityTriples_time_indexed_resampled = entityTriples_time_indexed_resampled.reindex(dna_healthtech_articles_eu_us_time_indexed_resampled.index, fill_value=0)
216
- curve = hv.Curve((entityTriples_time_indexed_resampled.index, (entityTriples_time_indexed_resampled['doc_id']/dna_healthtech_articles_eu_us_time_indexed_resampled['ids'])), 'Time', 'Key Entity Occurrence', label=ent[0])
217
- curve.opts(autorange='y')
218
- curveList.append(curve)
219
- overlay = hv.Overlay(curveList)
220
- overlay.opts(legend_muted=False, legend_cols=4, show_legend = True, legend_position='top_left', fontsize={'legend':13},width=1200, height=800)
221
- return overlay
222
 
223
 
224
  ############################# WIDGETS & CALLBACK ###########################################
@@ -228,7 +80,7 @@ def filter_data0(df, min_value):
228
  return filtered_df
229
 
230
 
231
- def plot_chord0_new(df,min_value):
232
  filtered_df = filter_data0(df, min_value)
233
  # Create a Holoviews Dataset for nodes
234
  nodes = hv.Dataset(filtered_df, 'index')
@@ -237,89 +89,44 @@ def plot_chord0_new(df,min_value):
237
  return chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', label_text_color="white", node_color = hv.dim('index').str(), edge_color = hv.dim('source').str(), labels = 'index', tools=['hover'], width=800, height=800))
238
 
239
 
240
- def retrieveRegionTypes(region):
241
- if region == 'eu':
242
- return top_type_filtered_eu
243
- elif region == 'us':
244
- return top_type_filtered_us
245
- elif region == 'eu_us':
246
- return top_type_filtered_eu_us
247
-
248
 
249
- def filter_region(region):
250
- if region == 'eu':
251
- region_grouping = grouping_filtered[grouping_filtered['region'] == 'eu']
252
- elif region == 'us':
253
- region_grouping = grouping_filtered[grouping_filtered['region'] == 'us']
254
- elif region == 'eu_us':
255
- region_grouping = grouping_filtered[grouping_filtered['region'] == 'eu_us']
256
-
257
- #print(len(region_grouping))
258
  # Define range for minimum value slider
259
- min_value_range = region_grouping['value'].unique()
260
  min_value_range.sort()
261
 
262
  # Define HoloMap with minimum value and attribute as key dimensions
263
- holomap = hv.HoloMap({min_value: plot_chord0_new(region_grouping, min_value)
264
  for min_value in min_value_range},
265
  kdims=['Show triples with support greater than']
266
  )
267
  return holomap
268
 
269
 
270
- # Define a function to generate Entity List RadioButtonGroup based on Region selection
271
- def generate_radio_buttons(value):
272
- if value == 'eu':
273
- return pn.widgets.RadioButtonGroup(options=retrieveRegionTypes(value), value='DBpedia:Company', name='eu', orientation='vertical')
274
- elif value == 'us':
275
- return pn.widgets.RadioButtonGroup(options=retrieveRegionTypes(value), value='DBpedia:Disease', name='us', orientation='vertical')
276
- elif value == 'eu_us':
277
- return pn.widgets.RadioButtonGroup(options=retrieveRegionTypes(value), value='DBpedia:Person', name='eu_us', orientation='vertical')
278
-
279
-
280
-
281
  # https://tabler-icons.io/
282
  button1 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"})
283
- button2 = pn.widgets.Button(name="Health Tech News Ratio", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
284
- button3 = pn.widgets.Button(name="Top Entity Types", button_type="warning", icon="chart-bar", styles={"width": "100%"})
285
- button4 = pn.widgets.Button(name="Top Key Entities", button_type="warning", icon="chart-dots-filled", styles={"width": "100%"})
286
- button5 = pn.widgets.Button(name="Entity Chord Diagrams", button_type="warning", icon="chart-dots-filled", styles={"width": "100%"})
287
 
288
 
289
- region1 = pn.widgets.RadioButtonGroup(name='### Select News Region', options=regions)
290
 
 
291
 
292
- # Initial RadioButtonGroup
293
- radio_buttons_regions = pn.widgets.RadioButtonGroup(options=regions,value='eu',name='Select region')
294
- # Generate initial dynamic RadioButtonGroup
295
- radio_buttons_types = generate_radio_buttons(radio_buttons_regions.value)
296
-
297
-
298
-
299
- # Define a callback function to update the panel dynamically
300
- def update_radio_group(event):
301
- #print(event.new)
302
- #print(retrieveRegionTypes(event.new))
303
- radio_buttons_types.options = retrieveRegionTypes(event.new)
304
-
305
-
306
- # bind the function to the widget(s)
307
- dmap2 = hv.DynamicMap(pn.bind(generate_entity_curves, radio_buttons_regions,radio_buttons_types))
308
- # Bind the selected value of the first RadioButtonGroup to update the second RadioButtonGroup
309
- radio_buttons_regions.param.watch(update_radio_group, 'value')
310
 
311
  # Define the callback function to update the HoloMap
312
  def update_holomap(event):
313
- initial_holomap.object = filter_region(event.new)
314
 
315
- region_radio_button = pn.widgets.RadioButtonGroup(options=regions, value='eu', name='Select Region')
316
 
317
  # Create the initial HoloMap
318
- initial_holomap = filter_region(region_radio_button.value)
319
 
320
  # Bind the callback function to the value change event of the RadioButton widget
321
- region_radio_button.param.watch(update_holomap, 'value')
322
-
323
 
324
 
325
  def show_page(page_key):
@@ -330,7 +137,6 @@ button1.on_click(lambda event: show_page("Page1"))
330
  button2.on_click(lambda event: show_page("Page2"))
331
  button3.on_click(lambda event: show_page("Page3"))
332
  button4.on_click(lambda event: show_page("Page4"))
333
- button5.on_click(lambda event: show_page("Page5"))
334
 
335
 
336
  ### CREATE PAGE LAYOUTS
@@ -338,55 +144,39 @@ button5.on_click(lambda event: show_page("Page5"))
338
  def CreatePage1():
339
  return pn.Column(pn.pane.Markdown("""
340
 
341
- This is a dashboard for a News Analysis project regarding Digital Health technology. The source data consists of around 7.8 million English-language news articles gathered from the **Dow Jones Data, News, and Analytics (DNA)**
342
- platform (https://www.dowjones.com/professional/developer-platform/) covering a timeframe from September 1987 through December 2023. The news items text content is copyrighted and cannot be shared within this project.
343
-
344
- Some of the data analytics visualizations show here come from a Knowledge Graph automatically extracted from DNA news sources. A Virtuoso SPARQL endpoint to this graph (named 'DHNEWS KG') is set up at the
345
- URL: https://api-vast.jrc.service.ec.europa.eu/sparql/
346
 
347
 
348
  ---------------------------
349
 
350
- ## 1. Health Tech News Ratio
351
- In the Health Tech News Ratio panel we present the month-sampled time series depicting the proportion of 97k news articles con-
352
- cerning Digital Health, as identified by a text classifier, out of the total number of English language DNA news articles pertaining to Europe and the US
353
-
354
-
355
- ### 2. Top Entity Types
356
- The Top Entity Types bar plots in the dashboard show the predominant DBpedia-inherited entity types within the graph for triples tagged with Europe, US, and EU-US region codes via their article support.
357
-
358
- ## 3. Top Key Entities
359
  The Top Key Entities plots track the occurrence of several key entities per year, where occurrence means the entity is either the Subject or Object of an extracted triple in the KG.
360
 
361
- ## 4. Entity Chord Diagrams
362
  Entity Chord Diagrams represent the most frequently connected entity pairs within the KG through chord illustrations, serving as both Subjects and Objects of predicative triples.
363
  The size of the chords corresponds to the support of the depicted relations.
364
  """, width=800), align="center")
365
 
366
  def CreatePage2():
367
  return pn.Column(
368
- pn.pane.Markdown("## Health Tech News Ratio "),
369
- create_curve_chart(),
 
370
  align="center",
371
  )
372
 
373
  def CreatePage3():
374
  return pn.Column(
375
- region1,
376
- pn.bind(create_bar_charts, region1),
377
- align="center",
378
- )
379
-
380
- def CreatePage4():
381
- return pn.Column(
382
- pn.pane.Markdown("## Top Key Entities "),
383
- pn.Row(pn.Column(radio_buttons_regions, radio_buttons_types), dmap2),
384
  align="center", )
385
 
386
- def CreatePage5():
387
  return pn.Column(
388
- pn.pane.Markdown("## Entity Chord Diagrams "),
389
- pn.Row(region_radio_button, pn.bind(filter_region, region_radio_button)),
390
  align="center", )
391
 
392
  mapping = {
@@ -394,14 +184,12 @@ mapping = {
394
  "Page2": CreatePage2(),
395
  "Page3": CreatePage3(),
396
  "Page4": CreatePage4(),
397
- "Page5": CreatePage5(),
398
  }
399
 
400
  #################### SIDEBAR LAYOUT ##########################
401
  sidebar = pn.Column(pn.pane.Markdown("## Pages"), button1,button2,button3,
402
  button4,
403
- button5,
404
- styles={"width": "100%", "padding": "15px"})
405
 
406
  #################### MAIN AREA LAYOUT ##########################
407
  main_area = pn.Column(mapping["Page1"], styles={"width":"100%"})
 
33
 
34
  ## LOAD DATASETS
35
 
36
+ data = './data'
37
+
38
+
39
+ def read_freq_map(filename):
40
+ df = pd.read_csv(os.path.join(data,filename))
41
+ column_0 = df.columns[0]
42
+ freqmap = dict(zip(df[column_0], df['count']))
43
+ return freqmap
44
+
45
+
46
+
47
+ entityTypesFreqMap = read_freq_map('entityTypes.tsv')
48
+ relationTypesFreqMap = read_freq_map('relationTypes.tsv')
49
+ topDrugEntities = read_freq_map('topDrugs.tsv')
50
+ topConditionEntities = read_freq_map('topConditions.tsv')
51
+
52
+
53
+ grouping_filtered = pd.read_csv(os.path.join(data, 'drugReviewsCausal_relations.tsv'), sep=" ")
54
+
55
+
56
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  ################################# CREATE CHARTS ############################
58
+ def create_type_bar_charts(entRelsButton, **kwargs):
59
+ if entRelsButton=='Entity':
60
+ dictionary = entityTypesFreqMap
61
+ return hv.Bars(dictionary, hv.Dimension('Entity Types'), 'Frequency').opts( framewise=True, xrotation=45,width=1200, height=600)
62
+ elif entRelsButton=='Relation':
63
+ dictionary = relationTypesFreqMap
64
+ return hv.Bars(dictionary, hv.Dimension('Relation Types'), 'Frequency').opts(framewise=True, xrotation=45,width=1200, height=600)
65
+
66
+
67
+ def create_ent_bar_charts(ents, **kwargs):
68
+ if ents=='Drug':
69
+ dictionary = topDrugEntities
70
+ return hv.Bars(dictionary, hv.Dimension('Drug Entities'), 'Frequency').opts( framewise=True, xrotation=45,width=1200, height=600)
71
+ elif entRelsButton=='Condition':
72
+ dictionary = topConditionEntities
73
+ return hv.Bars(dictionary, hv.Dimension('Condition Entities'), 'Frequency').opts(framewise=True, xrotation=45,width=1200, height=600)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
 
76
  ############################# WIDGETS & CALLBACK ###########################################
 
80
  return filtered_df
81
 
82
 
83
+ def plot_chord(df,min_value):
84
  filtered_df = filter_data0(df, min_value)
85
  # Create a Holoviews Dataset for nodes
86
  nodes = hv.Dataset(filtered_df, 'index')
 
89
  return chord.opts(opts.Chord(cmap='Category20', edge_cmap='Category20', label_text_color="white", node_color = hv.dim('index').str(), edge_color = hv.dim('source').str(), labels = 'index', tools=['hover'], width=800, height=800))
90
 
91
 
92
+ def filter_triples(rel):
93
+ rel_grouping = grouping_filtered[grouping_filtered['causal_relation'] == rel]
 
 
 
 
 
 
94
 
 
 
 
 
 
 
 
 
 
95
  # Define range for minimum value slider
96
+ min_value_range = rel_grouping['value'].unique()
97
  min_value_range.sort()
98
 
99
  # Define HoloMap with minimum value and attribute as key dimensions
100
+ holomap = hv.HoloMap({min_value: plot_chord(rel_grouping, min_value)
101
  for min_value in min_value_range},
102
  kdims=['Show triples with support greater than']
103
  )
104
  return holomap
105
 
106
 
 
 
 
 
 
 
 
 
 
 
 
107
  # https://tabler-icons.io/
108
  button1 = pn.widgets.Button(name="Introduction", button_type="warning", icon="file-info", styles={"width": "100%"})
109
+ button2 = pn.widgets.Button(name="Entity/Relation Types", button_type="warning", icon="chart-histogram", styles={"width": "100%"})
110
+ button3 = pn.widgets.Button(name="Top Entities", button_type="warning", icon="chart-bar", styles={"width": "100%"})
111
+ button4 = pn.widgets.Button(name="Causal Relation Chord Diagrams", button_type="warning", icon="chart-dots-filled", styles={"width": "100%"})
 
112
 
113
 
114
+ entRelsButton = pn.widgets.RadioButtonGroup(name='### Select', options=['Entity','Relation'], value = 'Entity' )
115
 
116
+ entTypeButton = pn.widgets.RadioButtonGroup(name='### Select Entity Type', options=entityTypesFreqMap.keys(), value='Disease')
117
 
118
+ relationTypeButton = pn.widgets.RadioButtonGroup(options=relationTypesFreqMap.keys(), value='Cause', name='Select Causal Relation')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  # Define the callback function to update the HoloMap
121
  def update_holomap(event):
122
+ initial_holomap.object = filter_triples(event.new)
123
 
 
124
 
125
  # Create the initial HoloMap
126
+ initial_holomap = filter_triples(relationTypeButton.value)
127
 
128
  # Bind the callback function to the value change event of the RadioButton widget
129
+ relationTypeButton.param.watch(update_holomap, 'value')
 
130
 
131
 
132
  def show_page(page_key):
 
137
  button2.on_click(lambda event: show_page("Page2"))
138
  button3.on_click(lambda event: show_page("Page3"))
139
  button4.on_click(lambda event: show_page("Page4"))
 
140
 
141
 
142
  ### CREATE PAGE LAYOUTS
 
144
  def CreatePage1():
145
  return pn.Column(pn.pane.Markdown("""
146
 
147
+ This is a dashboard for exploring a causal relation knowledge graph automatically extracted from a collection of drug reviews. The source data consists of around 19200 reviews from the **Drug Reviews (Druglib.com)** dataset (https://archive.ics.uci.edu/dataset/461/drug+review+dataset+druglib+com) containing patient reviews on specific drugs along with related conditions, crawled from online pharmaceutical review sites.
148
+ The causal relations represented in the KG are defined by the **MIMICause** schema (https://huggingface.co/datasets/pensieves/mimicause). The underlying CausalDrugsKG graph is available in Turtle and RDF serialization format in the European Data portal: https://data.jrc.ec.europa.eu/dataset/acebeb4e-9789-4b5c-97ec-292ce14e75d0.
 
 
 
149
 
150
 
151
  ---------------------------
152
 
153
+ ## 1. Top Key Entities
 
 
 
 
 
 
 
 
154
  The Top Key Entities plots track the occurrence of several key entities per year, where occurrence means the entity is either the Subject or Object of an extracted triple in the KG.
155
 
156
+ ## 2. Entity Chord Diagrams
157
  Entity Chord Diagrams represent the most frequently connected entity pairs within the KG through chord illustrations, serving as both Subjects and Objects of predicative triples.
158
  The size of the chords corresponds to the support of the depicted relations.
159
  """, width=800), align="center")
160
 
161
  def CreatePage2():
162
  return pn.Column(
163
+ pn.pane.Markdown("## Entity/Relation Types "),
164
+ entRelsButton,
165
+ pn.bind(create_type_bar_charts, entRelsButton),
166
  align="center",
167
  )
168
 
169
  def CreatePage3():
170
  return pn.Column(
171
+ pn.pane.Markdown("## Top Entities "),
172
+ entTypeButton,
173
+ pn.bind(create_ent_bar_charts, entTypeButton),
 
 
 
 
 
 
174
  align="center", )
175
 
176
+ def CreatePage4():
177
  return pn.Column(
178
+ pn.pane.Markdown("## Causal Relation Chord Diagrams"),
179
+ pn.Row(relationTypeButton, pn.bind(filter_triples, relationTypeButton)),
180
  align="center", )
181
 
182
  mapping = {
 
184
  "Page2": CreatePage2(),
185
  "Page3": CreatePage3(),
186
  "Page4": CreatePage4(),
 
187
  }
188
 
189
  #################### SIDEBAR LAYOUT ##########################
190
  sidebar = pn.Column(pn.pane.Markdown("## Pages"), button1,button2,button3,
191
  button4,
192
+ styles={"width": "100%", "padding": "15px"})
 
193
 
194
  #################### MAIN AREA LAYOUT ##########################
195
  main_area = pn.Column(mapping["Page1"], styles={"width":"100%"})