import streamlit as st import xml.etree.ElementTree as ET import pandas as pd from io import StringIO import folium from streamlit_folium import st_folium # ------------------------------- # Authority Lists as XML Strings # ------------------------------- materials_xml = """ Lapis Stone Stone used as a durable medium for inscriptions and engravings. Argentum Silver Silver used in inscriptions, often for its lustrous appearance and value. Plumbum Lead Lead utilized in inscriptions, valued for its malleability and ease of engraving. Opus Figlinae Pottery Pottery used as a medium for inscriptions, typically in the form of ceramic artifacts. """ places_xml = """ Vize https://www.geonames.org/738154/vize.html https://pleiades.stoa.org/places/511190 40.6545 28.4078 Ancient city located in modern-day Turkey. Philippi https://www.geonames.org/734652/filippoi-philippi.html https://pleiades.stoa.org/places/501482 40.5044 24.9722 Ancient city in Macedonia, founded by Philip II of Macedon. Augusta Traiana https://www.geonames.org/maps/google_42.4333_25.65.html https://pleiades.stoa.org/places/216731 42.4259 25.6272 Ancient Roman city, present-day Stara Zagora in Bulgaria. Dyrrachium https://www.geonames.org/3185728/durres.html https://pleiades.stoa.org/places/481818 41.3231 19.4417 Ancient city on the Adriatic coast, present-day Durrës in Albania. Antisara https://www.geonames.org/736079/akra-kalamitsa.html https://pleiades.stoa.org/places/501351 39.5000 20.0000 Ancient settlement, exact modern location TBD. Macedonia - - 40.0000 22.0000 Historical region in Southeast Europe, encompassing parts of modern Greece, North Macedonia, and Bulgaria. """ titles_xml = """ <name>Imperator</name> <name_gr>Αυτοκράτορας</name_gr> <abbreviation>Imp.</abbreviation> <description>A title granted to a victorious general, later adopted as a formal title by Roman emperors.</description> <name>Caesar</name> <name_gr>Καῖσαρ</name_gr> <abbreviation>Caes.</abbreviation> <description>A title used by Roman emperors, originally the family name of Julius Caesar.</description> <name>Augustus</name> <name_gr>-</name_gr> <abbreviation>Aug.</abbreviation> <description>The first Roman emperor's title, signifying revered or majestic status.</description> """ # ------------------------------- # Parse Authority Lists # ------------------------------- def parse_materials(xml_string): materials = {} root = ET.fromstring(xml_string) for material in root.findall('material'): material_id = material.get('id') materials[material_id] = { 'Name': material.find('name').text, 'Name_EN': material.find('name_en').text, 'Description': material.find('description').text } return materials def parse_places(xml_string): places = {} root = ET.fromstring(xml_string) for place in root.findall('place'): place_id = place.get('id') places[place_id] = { 'Name': place.find('name').text, 'GeoNames Link': place.find('geonamesLink').text, 'Pleiades Link': place.find('pleiadesLink').text, 'Latitude': float(place.find('latitude').text), 'Longitude': float(place.find('longitude').text), 'Description': place.find('description').text } return places def parse_titles(xml_string): titles = {} root = ET.fromstring(xml_string) for title in root.findall('title'): title_id = title.get('id') titles[title_id] = { 'Name': title.find('name').text, 'Name_GR': title.find('name_gr').text, 'Abbreviation': title.find('abbreviation').text, 'Description': title.find('description').text } return titles # Load authority data materials_dict = parse_materials(materials_xml) places_dict = parse_places(places_xml) titles_dict = parse_titles(titles_xml) # ------------------------------- # Function to Find Place ID by Name (Case-Insensitive) # ------------------------------- def find_place_id_by_name(name): """ Finds the place ID by matching the place name (case-insensitive). Returns the place ID if found, else returns the original name. """ for id_, place in places_dict.items(): if place['Name'].strip().lower() == name.strip().lower(): return id_ return name # Return the original name if no match is found # ------------------------------- # Function to Parse Inscriptions # ------------------------------- def parse_inscriptions(xml_content): tree = ET.ElementTree(ET.fromstring(xml_content)) root = tree.getroot() inscriptions = [] for inscription in root.findall('inscription'): n = inscription.get('n') publisher = inscription.find('Publisher').text if inscription.find('Publisher') is not None else "N/A" # Handle Origin with or without 'ref' attribute origin_elem = inscription.find('Origin') if origin_elem is not None: origin_ref = origin_elem.get('ref') if origin_ref: origin_id = origin_ref else: origin_text = origin_elem.text.strip() if origin_elem.text else "" origin_id = find_place_id_by_name(origin_text) else: origin_id = "N/A" origin = places_dict.get(origin_id, {}).get('Name', origin_id) origin_geonames_link = places_dict.get(origin_id, {}).get('GeoNames Link', "#") origin_pleiades_link = places_dict.get(origin_id, {}).get('Pleiades Link', "#") # Handle Material with or without 'ref' attribute material_elem = inscription.find('Material') if material_elem is not None: material_ref = material_elem.get('ref') if material_ref: material_id = material_ref else: material_text = material_elem.text.strip() if material_elem.text else "" # Attempt to find material ID by matching the name_en material_id = None for id_, material in materials_dict.items(): if material['Name_EN'].strip().lower() == material_text.strip().lower(): material_id = id_ break if not material_id: material_id = material_text # Use the text if no match found else: material_id = "N/A" material = materials_dict.get(material_id, {}).get('Name_EN', material_id) language = inscription.find('Language').text if inscription.find('Language') is not None else "N/A" text = "".join(inscription.find('Text').itertext()).strip() if inscription.find('Text') is not None else "N/A" dating = inscription.find('Dating').text if inscription.find('Dating') is not None else "N/A" images = inscription.find('Images').text if inscription.find('Images') is not None else "N/A" encoder = inscription.find('Encoder').text if inscription.find('Encoder') is not None else "N/A" category_terms = [term.text for term in inscription.findall('Category/term')] inscriptions.append({ 'Number': n, 'Publisher': publisher, 'Origin_ID': origin_id, 'Origin': origin, 'GeoNames Link': origin_geonames_link, 'Pleiades Link': origin_pleiades_link, 'Material_ID': material_id, 'Material': material, 'Language': language, 'Text': text, 'Dating': dating, 'Images': images, 'Encoder': encoder, 'Categories': ", ".join(category_terms) }) return pd.DataFrame(inscriptions) # ------------------------------- # Functions to Render Editions # ------------------------------- def render_diplomatic(text_element): lines = [] current_line = "" for elem in text_element.iter(): if elem.tag == "lb": if current_line: lines.append(current_line.strip()) current_line = "" # Start a new line line_number = elem.get("n", "") current_line += f"{line_number} " if line_number else "" elif elem.tag == "supplied": # Process nested elements and concatenate abbreviations supplied_content = "" for sub_elem in elem.findall(".//expan"): # Nested elements abbr_elem = sub_elem.find("abbr") if abbr_elem is not None and abbr_elem.text: supplied_content += abbr_elem.text.upper() current_line += f"[{supplied_content}]" elif elem.tag == "expan": # Use only the abbreviation part abbr_elem = elem.find("abbr") if abbr_elem is not None and abbr_elem.text: current_line += abbr_elem.text.upper() elif elem.tag == "g" and elem.get("type") == "leaf": current_line += " LEAF " elif elem.tag == "title" and elem.get("type") == "emperor": # Include title abbreviations title_ref = elem.get('ref') title_info = titles_dict.get(title_ref, {}) abbreviation = title_info.get('Abbreviation', '') current_line += abbreviation elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]: current_line += elem.text.upper() if current_line: lines.append(current_line.strip()) # Append the last line return "\n".join(lines) def render_editor(text_element): lines = [] current_line = "" for elem in text_element.iter(): if elem.tag == "lb": if current_line: lines.append(current_line.strip()) current_line = "" # Start a new line line_number = elem.get("n", "") current_line += f"{line_number} " if line_number else "" elif elem.tag == "supplied": # Process nested elements with abbreviation and expansion supplied_content = [] for sub_elem in elem.findall(".//expan"): # Nested elements abbr_elem = sub_elem.find("abbr") ex_elem = sub_elem.find("ex") abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else "" ex = ex_elem.text if ex_elem is not None and ex_elem.text else "" supplied_content.append(f"{abbr}({ex})") current_line += " ".join(supplied_content) elif elem.tag == "expan": # Render abbreviation and expansion abbr_elem = elem.find("abbr") ex_elem = elem.find("ex") abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else "" ex = ex_elem.text if ex_elem is not None and ex_elem.text else "" current_line += f"{abbr}({ex})" elif elem.tag == "g" and elem.get("type") == "leaf": current_line += " ((leaf)) " elif elem.tag == "title" and elem.get("type") == "emperor": # Render title abbreviation and name title_ref = elem.get('ref') title_info = titles_dict.get(title_ref, {}) abbreviation = title_info.get('Abbreviation', '') name_gr = title_info.get('Name_GR', '') current_line += f"{abbreviation} {name_gr}" elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]: current_line += elem.text if current_line: lines.append(current_line.strip()) # Append the last line return "\n".join(lines) # ------------------------------- # Streamlit App Layout # ------------------------------- st.set_page_config(page_title="Epigraphic XML Viewer", layout="wide") st.title("Epigraphic XML Viewer: Diplomatic and Editor Editions") # ------------------------------- # Sidebar - Project Information # ------------------------------- with st.sidebar: st.header("Project Information") st.markdown(""" **Epigraphic Database Viewer** is a tool designed to visualize and analyze ancient inscriptions. **Features**: - Upload and view XML inscriptions data. - Explore inscriptions in various formats. - Visualize geographical origins on an interactive map. **Authority Lists**: - **Materials**: Details about materials used in inscriptions. - **Places**: Geographical data and descriptions. - **Emperor Titles**: Titles and abbreviations used in inscriptions. **Developed by**: Your Name or Team """) # ------------------------------- # File uploader for Inscriptions XML # ------------------------------- uploaded_file = st.file_uploader("Upload Inscriptions XML File", type=["xml"]) if uploaded_file: st.success("File uploaded successfully!") # Read uploaded XML content inscriptions_content = uploaded_file.getvalue().decode("utf-8") else: st.info("No file uploaded. Using default sample XML data.") # Default XML data (as provided by the user) inscriptions_content = """ EDCS Vize https://www.geonames.org/738154/vize.html https://pleiades.stoa.org/places/511190 Augusti/Augustae ordo senatorius tituli sacri tria nomina viri lapis Greek ἀγαθῇ τύχῃ ὑπὲρ τῆς τοῦ Αὐτοκράτορος Tίτου Αἰλίου Ἁδριανοῦ Ἀντωνείνου Καί <lb n="4"/>σαροςΣεβαστοῦ Εὐσεβοῦς καὶ Οὐήρου Καίσαρ ος νείκης τε καὶ αἰωνίου διαμονῆς καὶ τοῦ σύμπαντος αὐτῶν οἴκου ἱερᾶς τε συνκλήτου καὶ δήμου Ῥωμαίων ἡγεμονεύοντος ἐπαρχείας Θρᾴκης Γαΐου Ἰουλίου Κομμόδου πρεσβ<ex>ευτοῦ</ex> Σεβαστοῦ ἀντιστρατήγου ἡ πόλις Βιζυηνῶν κατεσκεύασεν τοὺς πυργοὺς διὰ ἐπιμελητῶν Φίρμου Αυλουπορε ος καὶ Αυλουκενθου Δυτουκενθου καὶ Ραζδου Ὑακίνθου εὐτυχεῖτε 155 to 155 https://db.edcs.eu/epigr/ae/ae1951/ae1951-74.pdf Admin """ # ------------------------------- # Parse Inscriptions # ------------------------------- try: df = parse_inscriptions(inscriptions_content) except ET.ParseError as e: st.error(f"Error parsing XML: {e}") st.stop() # ------------------------------- # Tabs for Different Views # ------------------------------- tabs = st.tabs(["Raw XML", "DataFrame", "Diplomatic Edition", "Editor Edition", "Visualization"]) # ------------------------------- # Raw XML Tab # ------------------------------- with tabs[0]: st.subheader("Raw XML Content") st.code(inscriptions_content, language="xml") # ------------------------------- # DataFrame Tab # ------------------------------- with tabs[1]: st.subheader("Inscriptions Data") st.dataframe(df) # ------------------------------- # Diplomatic Edition Tab # ------------------------------- with tabs[2]: st.subheader("Diplomatic Edition") # Select Inscription inscription_numbers = df['Number'].tolist() selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers) selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0] # Parse the selected inscription's XML to get the Text element tree = ET.ElementTree(ET.fromstring(inscriptions_content)) root = tree.getroot() inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']") text_element = inscription_elem.find("Text") if inscription_elem is not None else None if text_element is not None: diplomatic_text = render_diplomatic(text_element) st.code(diplomatic_text, language="plaintext") else: st.warning("No text found for the selected inscription.") # ------------------------------- # Editor Edition Tab # ------------------------------- with tabs[3]: st.subheader("Editor Edition") # Select Inscription inscription_numbers = df['Number'].tolist() selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers, key='editor_select') selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0] # Parse the selected inscription's XML to get the Text element tree = ET.ElementTree(ET.fromstring(inscriptions_content)) root = tree.getroot() inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']") text_element = inscription_elem.find("Text") if inscription_elem is not None else None if text_element is not None: editor_text = render_editor(text_element) st.code(editor_text, language="plaintext") else: st.warning("No text found for the selected inscription.") # ------------------------------- # Visualization Tab # ------------------------------- with tabs[4]: st.subheader("Visualization") # Extract categories all_categories = set() for categories in df['Categories']: for cat in categories.split(", "): all_categories.add(cat) # Category filtering selected_categories = st.multiselect("Filter by Category", sorted(all_categories)) if selected_categories: filtered_df = df[df['Categories'].apply(lambda x: any(cat in x.split(", ") for cat in selected_categories))] else: filtered_df = df.copy() # Merge with places to get coordinates def get_coordinates(origin_id): place = places_dict.get(origin_id, {}) return place.get('Latitude'), place.get('Longitude') # Apply the function to get Latitude and Longitude filtered_df['Latitude'], filtered_df['Longitude'] = zip(*filtered_df['Origin_ID'].apply(get_coordinates)) # Drop entries without coordinates map_df = filtered_df.dropna(subset=['Latitude', 'Longitude']) if not map_df.empty: # Create a Folium map centered around the average coordinates avg_lat = map_df['Latitude'].mean() avg_lon = map_df['Longitude'].mean() folium_map = folium.Map(location=[avg_lat, avg_lon], zoom_start=6) # Add markers to the map for _, row in map_df.iterrows(): popup_content = f""" Inscription Number: {row['Number']}
Publisher: {row['Publisher']}
Material: {row['Material']}
Language: {row['Language']}
Dating: {row['Dating']}
Encoder: {row['Encoder']}
Categories: {row['Categories']}
Text: {row['Text']}
""" if row['Images'] and row['Images'] != "N/A": popup_content += f'View Images
' folium.Marker( location=[row['Latitude'], row['Longitude']], popup=folium.Popup(popup_content, max_width=300), tooltip=f"Inscription {row['Number']}" ).add_to(folium_map) # Display the Folium map using streamlit_folium st_folium(folium_map, width=700, height=500) else: st.write("No inscriptions to display on the map based on the selected filters.") st.dataframe(filtered_df) # Detailed View for _, row in filtered_df.iterrows(): with st.expander(f"Inscription {row['Number']}"): st.markdown(f"**Publisher**: {row['Publisher']}") st.markdown(f"**Origin**: {row['Origin']} ([GeoNames Link]({row['GeoNames Link']}), [Pleiades Link]({row['Pleiades Link']}))") st.markdown(f"**Material**: {row['Material']} - {materials_dict.get(row['Material_ID'], {}).get('Description', '')}") st.markdown(f"**Language**: {row['Language']}") st.markdown(f"**Dating**: {row['Dating']}") st.markdown(f"**Encoder**: {row['Encoder']}") st.markdown(f"**Categories**: {row['Categories']}") st.markdown(f"**Text**:\n\n{row['Text']}") if row['Images'] and row['Images'] != "N/A": st.markdown(f"[View Images]({row['Images']})") # Display material description material_desc = materials_dict.get(row['Material_ID'], {}).get('Description', "No description available.") st.markdown(f"**Material Description**: {material_desc}") # Display place description place_desc = places_dict.get(row['Origin_ID'], {}).get('Description', "No description available.") st.markdown(f"**Place Description**: {place_desc}") # ------------------------------- # Footer # ------------------------------- st.markdown(""" --- **© 2024 InscriptaNET** """)