bestroi commited on
Commit
c854388
·
verified ·
1 Parent(s): ed42436

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +305 -55
app.py CHANGED
@@ -5,6 +5,10 @@ from io import StringIO
5
  import folium
6
  from streamlit_folium import st_folium
7
  import unicodedata
 
 
 
 
8
 
9
  # -------------------------------
10
  # Authority Lists as XML Strings
@@ -201,6 +205,8 @@ def parse_inscriptions(xml_content):
201
  origin = places_dict.get(origin_id, {}).get('Name', origin_id)
202
  origin_geonames_link = places_dict.get(origin_id, {}).get('GeoNames Link', "#")
203
  origin_pleiades_link = places_dict.get(origin_id, {}).get('Pleiades Link', "#")
 
 
204
 
205
  # Handle Material with or without 'ref' attribute
206
  material_elem = inscription.find('Material')
@@ -225,7 +231,25 @@ def parse_inscriptions(xml_content):
225
 
226
  language = inscription.find('Language').text if inscription.find('Language') is not None else "N/A"
227
 
228
- text = "".join(inscription.find('Text').itertext()).strip() if inscription.find('Text') is not None else "N/A"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  dating = inscription.find('Dating').text if inscription.find('Dating') is not None else "N/A"
231
  images = inscription.find('Images').text if inscription.find('Images') is not None else "N/A"
@@ -240,9 +264,13 @@ def parse_inscriptions(xml_content):
240
  'Origin': origin,
241
  'GeoNames Link': origin_geonames_link,
242
  'Pleiades Link': origin_pleiades_link,
 
 
243
  'Material_ID': material_id,
244
  'Material': material,
245
  'Language': language,
 
 
246
  'Text': text,
247
  'Dating': dating,
248
  'Images': images,
@@ -709,11 +737,14 @@ with tabs[4]:
709
  place_desc = places_dict.get(row['Origin_ID'], {}).get('Description', "No description available.")
710
  st.markdown(f"**Place Description**: {place_desc}")
711
 
 
 
 
712
  with tabs[5]:
713
  st.subheader("Authority Connections")
714
 
715
  # Define Authority Types
716
- authority_types = ["Material", "Place"] # Extend this list if you include Titles in the future
717
 
718
  # Select Authority Type
719
  selected_authority_type = st.selectbox("Select Authority Type", authority_types)
@@ -742,10 +773,9 @@ with tabs[5]:
742
  # Display inscriptions in a table
743
  st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Origin', 'Language', 'Dating', 'Encoder']])
744
 
745
- # Optional: Visualization - Number of Inscriptions per Year (Dating)
746
  st.markdown("#### Inscriptions Over Time")
747
- # Assuming 'Dating' is in a format that can be processed (e.g., single year or range)
748
- # For simplicity, we'll extract the starting year
749
  def extract_start_year(dating):
750
  if isinstance(dating, str):
751
  parts = dating.split('to')
@@ -757,38 +787,87 @@ with tabs[5]:
757
 
758
  connected_inscriptions['Start_Year'] = connected_inscriptions['Dating'].apply(extract_start_year)
759
  year_counts = connected_inscriptions['Start_Year'].dropna().astype(int).value_counts().sort_index()
 
 
760
 
761
- fig, ax = plt.subplots()
762
- ax.bar(year_counts.index, year_counts.values, color='skyblue')
763
- ax.set_xlabel('Year')
764
- ax.set_ylabel('Number of Inscriptions')
765
- ax.set_title(f'Number of Inscriptions Using {selected_material} Over Time')
766
- st.pyplot(fig)
 
 
 
767
 
768
- # Optional: Network Graph
769
  st.markdown("#### Network Graph of Inscriptions and Materials")
 
 
770
  G = nx.Graph()
771
-
772
  # Add nodes
773
  G.add_node(selected_material, type='Material')
774
  for _, row in connected_inscriptions.iterrows():
775
  inscription_node = f"Inscription {row['Number']}"
776
  G.add_node(inscription_node, type='Inscription')
777
  G.add_edge(selected_material, inscription_node)
778
-
779
- # Define node colors based on type
780
- color_map = []
781
- for node in G:
782
- if G.nodes[node]['type'] == 'Material':
783
- color_map.append('lightblue')
784
- else:
785
- color_map.append('lightgreen')
786
-
787
- # Draw the graph
788
- plt.figure(figsize=(8, 6))
789
- nx.draw(G, with_labels=True, node_color=color_map, node_size=1500, font_size=10, font_weight='bold')
790
- st.pyplot(plt)
791
- plt.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
792
 
793
  else:
794
  st.info("No inscriptions found for the selected material.")
@@ -816,53 +895,224 @@ with tabs[5]:
816
  # Display inscriptions in a table
817
  st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Material', 'Language', 'Dating', 'Encoder']])
818
 
819
- # Optional: Visualization - Inscriptions Geographical Distribution
820
  st.markdown("#### Geographical Distribution of Inscriptions")
821
  map_df = connected_inscriptions[['Latitude', 'Longitude', 'Number']]
822
  map_df = map_df.dropna(subset=['Latitude', 'Longitude'])
823
 
824
  if not map_df.empty:
825
- # Create a Folium map centered on the selected place
826
- selected_place_coords = [places_dict[place_id]['Latitude'], places_dict[place_id]['Longitude']]
827
- folium_map = folium.Map(location=selected_place_coords, zoom_start=8)
828
-
829
- for _, row in map_df.iterrows():
830
- folium.Marker(
831
- location=[row['Latitude'], row['Longitude']],
832
- popup=f"Inscription {row['Number']}"
833
- ).add_to(folium_map)
834
-
835
- st_folium(folium_map, width=700, height=500)
 
 
 
 
 
 
 
836
  else:
837
  st.info("No geographical data available for these inscriptions.")
838
 
839
- # Optional: Network Graph
840
  st.markdown("#### Network Graph of Inscriptions and Places")
841
  G = nx.Graph()
842
-
843
  # Add nodes
844
  G.add_node(selected_place, type='Place')
845
  for _, row in connected_inscriptions.iterrows():
846
  inscription_node = f"Inscription {row['Number']}"
847
  G.add_node(inscription_node, type='Inscription')
848
  G.add_edge(selected_place, inscription_node)
849
-
850
- # Define node colors based on type
851
- color_map = []
852
- for node in G:
853
- if G.nodes[node]['type'] == 'Place':
854
- color_map.append('salmon')
855
- else:
856
- color_map.append('lightgreen')
857
-
858
- # Draw the graph
859
- plt.figure(figsize=(8, 6))
860
- nx.draw(G, with_labels=True, node_color=color_map, node_size=1500, font_size=10, font_weight='bold')
861
- st.pyplot(plt)
862
- plt.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
863
 
864
  else:
865
  st.info("No inscriptions found for the selected place.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
866
 
867
 
868
  # -------------------------------
 
5
  import folium
6
  from streamlit_folium import st_folium
7
  import unicodedata
8
+ import networkx as nx
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
+
12
 
13
  # -------------------------------
14
  # Authority Lists as XML Strings
 
205
  origin = places_dict.get(origin_id, {}).get('Name', origin_id)
206
  origin_geonames_link = places_dict.get(origin_id, {}).get('GeoNames Link', "#")
207
  origin_pleiades_link = places_dict.get(origin_id, {}).get('Pleiades Link', "#")
208
+ latitude = places_dict.get(origin_id, {}).get('Latitude', None)
209
+ longitude = places_dict.get(origin_id, {}).get('Longitude', None)
210
 
211
  # Handle Material with or without 'ref' attribute
212
  material_elem = inscription.find('Material')
 
231
 
232
  language = inscription.find('Language').text if inscription.find('Language') is not None else "N/A"
233
 
234
+ # Extract Titles from the Text element
235
+ text_elem = inscription.find('Text')
236
+ titles_used = []
237
+ titles_descriptions = []
238
+ if text_elem is not None:
239
+ for title in text_elem.findall('.//title'):
240
+ title_ref = title.get('ref')
241
+ if title_ref and title_ref in titles_dict:
242
+ title_info = titles_dict[title_ref]
243
+ title_name = title_info['Name']
244
+ title_description = title_info['Description']
245
+ titles_used.append(title_name)
246
+ titles_descriptions.append(title_description)
247
+ elif title.text:
248
+ title_text = title.text.strip()
249
+ titles_used.append(title_text)
250
+ titles_descriptions.append("No description available.")
251
+
252
+ text = "".join(text_elem.itertext()).strip() if text_elem is not None else "N/A"
253
 
254
  dating = inscription.find('Dating').text if inscription.find('Dating') is not None else "N/A"
255
  images = inscription.find('Images').text if inscription.find('Images') is not None else "N/A"
 
264
  'Origin': origin,
265
  'GeoNames Link': origin_geonames_link,
266
  'Pleiades Link': origin_pleiades_link,
267
+ 'Latitude': latitude,
268
+ 'Longitude': longitude,
269
  'Material_ID': material_id,
270
  'Material': material,
271
  'Language': language,
272
+ 'Titles': ", ".join(titles_used) if titles_used else "N/A",
273
+ 'Title_Descriptions': "; ".join(titles_descriptions) if titles_descriptions else "N/A",
274
  'Text': text,
275
  'Dating': dating,
276
  'Images': images,
 
737
  place_desc = places_dict.get(row['Origin_ID'], {}).get('Description', "No description available.")
738
  st.markdown(f"**Place Description**: {place_desc}")
739
 
740
+ # -------------------------------
741
+ # Authority Connections Tab
742
+ # -------------------------------
743
  with tabs[5]:
744
  st.subheader("Authority Connections")
745
 
746
  # Define Authority Types
747
+ authority_types = ["Material", "Place", "Title"] # Added "Title"
748
 
749
  # Select Authority Type
750
  selected_authority_type = st.selectbox("Select Authority Type", authority_types)
 
773
  # Display inscriptions in a table
774
  st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Origin', 'Language', 'Dating', 'Encoder']])
775
 
776
+ # **Plotly Visualization: Inscriptions Over Time**
777
  st.markdown("#### Inscriptions Over Time")
778
+ # Assuming 'Dating' is in a format that can be processed (e.g., "155 to 155")
 
779
  def extract_start_year(dating):
780
  if isinstance(dating, str):
781
  parts = dating.split('to')
 
787
 
788
  connected_inscriptions['Start_Year'] = connected_inscriptions['Dating'].apply(extract_start_year)
789
  year_counts = connected_inscriptions['Start_Year'].dropna().astype(int).value_counts().sort_index()
790
+ year_counts = year_counts.reset_index()
791
+ year_counts.columns = ['Year', 'Count']
792
 
793
+ fig_bar = px.bar(
794
+ year_counts,
795
+ x='Year',
796
+ y='Count',
797
+ labels={'Count': 'Number of Inscriptions'},
798
+ title=f'Number of Inscriptions Using {selected_material} Over Time',
799
+ template='plotly_white'
800
+ )
801
+ st.plotly_chart(fig_bar, use_container_width=True)
802
 
803
+ # **Plotly Visualization: Network Graph of Inscriptions and Materials**
804
  st.markdown("#### Network Graph of Inscriptions and Materials")
805
+
806
+ # Create a network graph using Plotly
807
  G = nx.Graph()
808
+
809
  # Add nodes
810
  G.add_node(selected_material, type='Material')
811
  for _, row in connected_inscriptions.iterrows():
812
  inscription_node = f"Inscription {row['Number']}"
813
  G.add_node(inscription_node, type='Inscription')
814
  G.add_edge(selected_material, inscription_node)
815
+
816
+ # Generate positions for the nodes
817
+ pos = nx.spring_layout(G, k=0.5, iterations=50)
818
+
819
+ edge_x = []
820
+ edge_y = []
821
+ for edge in G.edges():
822
+ x0, y0 = pos[edge[0]]
823
+ x1, y1 = pos[edge[1]]
824
+ edge_x.extend([x0, x1, None])
825
+ edge_y.extend([y0, y1, None])
826
+
827
+ edge_trace = go.Scatter(
828
+ x=edge_x, y=edge_y,
829
+ line=dict(width=1, color='#888'),
830
+ hoverinfo='none',
831
+ mode='lines'
832
+ )
833
+
834
+ node_x = []
835
+ node_y = []
836
+ for node in G.nodes():
837
+ x, y = pos[node]
838
+ node_x.append(x)
839
+ node_y.append(y)
840
+
841
+ node_trace = go.Scatter(
842
+ x=node_x, y=node_y,
843
+ mode='markers+text',
844
+ text=[node for node in G.nodes()],
845
+ textposition="bottom center",
846
+ hoverinfo='text',
847
+ marker=dict(
848
+ showscale=False,
849
+ color=['lightblue' if G.nodes[node]['type'] == 'Material' else 'lightgreen' for node in G.nodes()],
850
+ size=20,
851
+ line_width=2
852
+ )
853
+ )
854
+
855
+ fig_network = go.Figure(data=[edge_trace, node_trace],
856
+ layout=go.Layout(
857
+ title=f"Network Graph: {selected_material} and Connected Inscriptions",
858
+ titlefont_size=16,
859
+ showlegend=False,
860
+ hovermode='closest',
861
+ margin=dict(b=20,l=5,r=5,t=40),
862
+ annotations=[ dict(
863
+ text="",
864
+ showarrow=False,
865
+ xref="paper", yref="paper") ],
866
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
867
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
868
+ )
869
+
870
+ st.plotly_chart(fig_network, use_container_width=True)
871
 
872
  else:
873
  st.info("No inscriptions found for the selected material.")
 
895
  # Display inscriptions in a table
896
  st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Material', 'Language', 'Dating', 'Encoder']])
897
 
898
+ # **Plotly Visualization: Geographical Distribution of Inscriptions**
899
  st.markdown("#### Geographical Distribution of Inscriptions")
900
  map_df = connected_inscriptions[['Latitude', 'Longitude', 'Number']]
901
  map_df = map_df.dropna(subset=['Latitude', 'Longitude'])
902
 
903
  if not map_df.empty:
904
+ fig_map = px.scatter_geo(
905
+ map_df,
906
+ lat='Latitude',
907
+ lon='Longitude',
908
+ hover_name='Number',
909
+ title=f'Geographical Distribution of Inscriptions from {selected_place}',
910
+ template='plotly_white'
911
+ )
912
+ fig_map.update_layout(
913
+ geo=dict(
914
+ scope='world',
915
+ projection_type='natural earth',
916
+ showland=True,
917
+ landcolor='lightgray',
918
+ showcountries=True,
919
+ )
920
+ )
921
+ st.plotly_chart(fig_map, use_container_width=True)
922
  else:
923
  st.info("No geographical data available for these inscriptions.")
924
 
925
+ # **Plotly Visualization: Network Graph of Inscriptions and Places**
926
  st.markdown("#### Network Graph of Inscriptions and Places")
927
  G = nx.Graph()
928
+
929
  # Add nodes
930
  G.add_node(selected_place, type='Place')
931
  for _, row in connected_inscriptions.iterrows():
932
  inscription_node = f"Inscription {row['Number']}"
933
  G.add_node(inscription_node, type='Inscription')
934
  G.add_edge(selected_place, inscription_node)
935
+
936
+ # Generate positions for the nodes
937
+ pos = nx.spring_layout(G, k=0.5, iterations=50)
938
+
939
+ edge_x = []
940
+ edge_y = []
941
+ for edge in G.edges():
942
+ x0, y0 = pos[edge[0]]
943
+ x1, y1 = pos[edge[1]]
944
+ edge_x.extend([x0, x1, None])
945
+ edge_y.extend([y0, y1, None])
946
+
947
+ edge_trace = go.Scatter(
948
+ x=edge_x, y=edge_y,
949
+ line=dict(width=1, color='#888'),
950
+ hoverinfo='none',
951
+ mode='lines'
952
+ )
953
+
954
+ node_x = []
955
+ node_y = []
956
+ for node in G.nodes():
957
+ x, y = pos[node]
958
+ node_x.append(x)
959
+ node_y.append(y)
960
+
961
+ node_trace = go.Scatter(
962
+ x=node_x, y=node_y,
963
+ mode='markers+text',
964
+ text=[node for node in G.nodes()],
965
+ textposition="bottom center",
966
+ hoverinfo='text',
967
+ marker=dict(
968
+ showscale=False,
969
+ color=['salmon' if G.nodes[node]['type'] == 'Place' else 'lightgreen' for node in G.nodes()],
970
+ size=20,
971
+ line_width=2
972
+ )
973
+ )
974
+
975
+ fig_network = go.Figure(data=[edge_trace, node_trace],
976
+ layout=go.Layout(
977
+ title=f"Network Graph: {selected_place} and Connected Inscriptions",
978
+ titlefont_size=16,
979
+ showlegend=False,
980
+ hovermode='closest',
981
+ margin=dict(b=20,l=5,r=5,t=40),
982
+ annotations=[ dict(
983
+ text="",
984
+ showarrow=False,
985
+ xref="paper", yref="paper") ],
986
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
987
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
988
+ )
989
+
990
+ st.plotly_chart(fig_network, use_container_width=True)
991
 
992
  else:
993
  st.info("No inscriptions found for the selected place.")
994
+
995
+ elif selected_authority_type == "Title":
996
+ # List all titles from titles_dict
997
+ title_names = [title['Name'] for title in titles_dict.values()]
998
+ selected_title = st.selectbox("Select Title", sorted(title_names))
999
+
1000
+ # Find the title ID based on the selected name
1001
+ title_id = None
1002
+ for id_, title in titles_dict.items():
1003
+ if title['Name'] == selected_title:
1004
+ title_id = id_
1005
+ break
1006
+
1007
+ if title_id:
1008
+ # Filter inscriptions that reference this title
1009
+ # Assuming 'Titles' column contains comma-separated titles
1010
+ connected_inscriptions = df[df['Titles'].str.contains(selected_title, case=False, na=False)]
1011
+
1012
+ st.markdown(f"### Inscriptions referencing **{selected_title}**")
1013
+ st.write(f"**Total Inscriptions:** {len(connected_inscriptions)}")
1014
+
1015
+ if not connected_inscriptions.empty:
1016
+ # Display inscriptions in a table
1017
+ st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Origin', 'Material', 'Language', 'Dating', 'Encoder']])
1018
+
1019
+ # **Plotly Visualization: Inscriptions Referencing the Title Over Time**
1020
+ st.markdown("#### Inscriptions Referencing the Title Over Time")
1021
+ def extract_start_year(dating):
1022
+ if isinstance(dating, str):
1023
+ parts = dating.split('to')
1024
+ try:
1025
+ return int(parts[0].strip())
1026
+ except:
1027
+ return None
1028
+ return None
1029
+
1030
+ connected_inscriptions['Start_Year'] = connected_inscriptions['Dating'].apply(extract_start_year)
1031
+ year_counts = connected_inscriptions['Start_Year'].dropna().astype(int).value_counts().sort_index()
1032
+ year_counts = year_counts.reset_index()
1033
+ year_counts.columns = ['Year', 'Count']
1034
+
1035
+ fig_bar = px.bar(
1036
+ year_counts,
1037
+ x='Year',
1038
+ y='Count',
1039
+ labels={'Count': 'Number of Inscriptions'},
1040
+ title=f'Number of Inscriptions Referencing "{selected_title}" Over Time',
1041
+ template='plotly_white'
1042
+ )
1043
+ st.plotly_chart(fig_bar, use_container_width=True)
1044
+
1045
+ # **Plotly Visualization: Network Graph of Inscriptions and Titles**
1046
+ st.markdown("#### Network Graph of Inscriptions and Titles")
1047
+
1048
+ # Create a network graph using Plotly
1049
+ G = nx.Graph()
1050
+
1051
+ # Add nodes
1052
+ G.add_node(selected_title, type='Title')
1053
+ for _, row in connected_inscriptions.iterrows():
1054
+ inscription_node = f"Inscription {row['Number']}"
1055
+ G.add_node(inscription_node, type='Inscription')
1056
+ G.add_edge(selected_title, inscription_node)
1057
+
1058
+ # Generate positions for the nodes
1059
+ pos = nx.spring_layout(G, k=0.5, iterations=50)
1060
+
1061
+ edge_x = []
1062
+ edge_y = []
1063
+ for edge in G.edges():
1064
+ x0, y0 = pos[edge[0]]
1065
+ x1, y1 = pos[edge[1]]
1066
+ edge_x.extend([x0, x1, None])
1067
+ edge_y.extend([y0, y1, None])
1068
+
1069
+ edge_trace = go.Scatter(
1070
+ x=edge_x, y=edge_y,
1071
+ line=dict(width=1, color='#888'),
1072
+ hoverinfo='none',
1073
+ mode='lines'
1074
+ )
1075
+
1076
+ node_x = []
1077
+ node_y = []
1078
+ for node in G.nodes():
1079
+ x, y = pos[node]
1080
+ node_x.append(x)
1081
+ node_y.append(y)
1082
+
1083
+ node_trace = go.Scatter(
1084
+ x=node_x, y=node_y,
1085
+ mode='markers+text',
1086
+ text=[node for node in G.nodes()],
1087
+ textposition="bottom center",
1088
+ hoverinfo='text',
1089
+ marker=dict(
1090
+ showscale=False,
1091
+ color=['orange' if G.nodes[node]['type'] == 'Title' else 'lightgreen' for node in G.nodes()],
1092
+ size=20,
1093
+ line_width=2
1094
+ )
1095
+ )
1096
+
1097
+ fig_network = go.Figure(data=[edge_trace, node_trace],
1098
+ layout=go.Layout(
1099
+ title=f"Network Graph: {selected_title} and Connected Inscriptions",
1100
+ titlefont_size=16,
1101
+ showlegend=False,
1102
+ hovermode='closest',
1103
+ margin=dict(b=20,l=5,r=5,t=40),
1104
+ annotations=[ dict(
1105
+ text="",
1106
+ showarrow=False,
1107
+ xref="paper", yref="paper") ],
1108
+ xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
1109
+ yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
1110
+ )
1111
+
1112
+ st.plotly_chart(fig_network, use_container_width=True)
1113
+
1114
+ else:
1115
+ st.info("No inscriptions found referencing the selected title.")
1116
 
1117
 
1118
  # -------------------------------