import streamlit as st import xml.etree.ElementTree as ET import pandas as pd from io import StringIO import folium from streamlit_folium import st_folium # ------------------------------- # Authority Lists as XML Strings # ------------------------------- materials_xml = """<?xml version="1.0" encoding="UTF-8"?> <materials> <material id="LAPIS"> <name>Lapis</name> <name_en>Stone</name_en> <description>Stone used as a durable medium for inscriptions and engravings.</description> </material> <material id="ARGENTUM"> <name>Argentum</name> <name_en>Silver</name_en> <description>Silver used in inscriptions, often for its lustrous appearance and value.</description> </material> <material id="PLUMBUM"> <name>Plumbum</name> <name_en>Lead</name_en> <description>Lead utilized in inscriptions, valued for its malleability and ease of engraving.</description> </material> <material id="OPUS_FIGLINAE"> <name>Opus Figlinae</name> <name_en>Pottery</name_en> <description>Pottery used as a medium for inscriptions, typically in the form of ceramic artifacts.</description> </material> </materials> """ places_xml = """<?xml version="1.0" encoding="UTF-8"?> <places> <place id="VIZE"> <name>Vize</name> <geonamesLink>https://www.geonames.org/738154/vize.html</geonamesLink> <pleiadesLink>https://pleiades.stoa.org/places/511190</pleiadesLink> <latitude>40.6545</latitude> <longitude>28.4078</longitude> <description>Ancient city located in modern-day Turkey.</description> </place> <place id="PHILIPPI"> <name>Philippi</name> <geonamesLink>https://www.geonames.org/734652/filippoi-philippi.html</geonamesLink> <pleiadesLink>https://pleiades.stoa.org/places/501482</pleiadesLink> <latitude>40.5044</latitude> <longitude>24.9722</longitude> <description>Ancient city in Macedonia, founded by Philip II of Macedon.</description> </place> <place id="AUGUSTA_TRAIANA"> <name>Augusta Traiana</name> <geonamesLink>https://www.geonames.org/maps/google_42.4333_25.65.html</geonamesLink> <pleiadesLink>https://pleiades.stoa.org/places/216731</pleiadesLink> <latitude>42.4259</latitude> <longitude>25.6272</longitude> <description>Ancient Roman city, present-day Stara Zagora in Bulgaria.</description> </place> <place id="DYRRACHIUM"> <name>Dyrrachium</name> <geonamesLink>https://www.geonames.org/3185728/durres.html</geonamesLink> <pleiadesLink>https://pleiades.stoa.org/places/481818</pleiadesLink> <latitude>41.3231</latitude> <longitude>19.4417</longitude> <description>Ancient city on the Adriatic coast, present-day Durrës in Albania.</description> </place> <place id="ANTISARA"> <name>Antisara</name> <geonamesLink>https://www.geonames.org/736079/akra-kalamitsa.html</geonamesLink> <pleiadesLink>https://pleiades.stoa.org/places/501351</pleiadesLink> <latitude>39.5000</latitude> <longitude>20.0000</longitude> <description>Ancient settlement, exact modern location TBD.</description> </place> <place id="MACEDONIA"> <name>Macedonia</name> <geonamesLink>-</geonamesLink> <pleiadesLink>-</pleiadesLink> <latitude>40.0000</latitude> <longitude>22.0000</longitude> <description>Historical region in Southeast Europe, encompassing parts of modern Greece, North Macedonia, and Bulgaria.</description> </place> </places> """ titles_xml = """<?xml version="1.0" encoding="UTF-8"?> <emperorTitles> <title id="IMPERATOR"> <name>Imperator</name> <name_gr>Αυτοκράτορας</name_gr> <abbreviation>Imp.</abbreviation> <description>A title granted to a victorious general, later adopted as a formal title by Roman emperors.</description> </title> <title id="CAESAR"> <name>Caesar</name> <name_gr>Καῖσαρ</name_gr> <abbreviation>Caes.</abbreviation> <description>A title used by Roman emperors, originally the family name of Julius Caesar.</description> </title> <title id="AUGUSTUS"> <name>Augustus</name> <name_gr>-</name_gr> <abbreviation>Aug.</abbreviation> <description>The first Roman emperor's title, signifying revered or majestic status.</description> </title> </emperorTitles> """ # ------------------------------- # Parse Authority Lists # ------------------------------- def parse_materials(xml_string): materials = {} root = ET.fromstring(xml_string) for material in root.findall('material'): material_id = material.get('id') materials[material_id] = { 'Name': material.find('name').text, 'Name_EN': material.find('name_en').text, 'Description': material.find('description').text } return materials def parse_places(xml_string): places = {} root = ET.fromstring(xml_string) for place in root.findall('place'): place_id = place.get('id') places[place_id] = { 'Name': place.find('name').text, 'GeoNames Link': place.find('geonamesLink').text, 'Pleiades Link': place.find('pleiadesLink').text, 'Latitude': float(place.find('latitude').text), 'Longitude': float(place.find('longitude').text), 'Description': place.find('description').text } return places def parse_titles(xml_string): titles = {} root = ET.fromstring(xml_string) for title in root.findall('title'): title_id = title.get('id') titles[title_id] = { 'Name': title.find('name').text, 'Name_GR': title.find('name_gr').text, 'Abbreviation': title.find('abbreviation').text, 'Description': title.find('description').text } return titles # Load authority data materials_dict = parse_materials(materials_xml) places_dict = parse_places(places_xml) titles_dict = parse_titles(titles_xml) # ------------------------------- # Function to Find Place ID by Name (Case-Insensitive) # ------------------------------- def find_place_id_by_name(name): """ Finds the place ID by matching the place name (case-insensitive). Returns the place ID if found, else returns the original name. """ for id_, place in places_dict.items(): if place['Name'].strip().lower() == name.strip().lower(): return id_ return name # Return the original name if no match is found # ------------------------------- # Function to Parse Inscriptions # ------------------------------- def parse_inscriptions(xml_content): tree = ET.ElementTree(ET.fromstring(xml_content)) root = tree.getroot() inscriptions = [] for inscription in root.findall('inscription'): n = inscription.get('n') publisher = inscription.find('Publisher').text if inscription.find('Publisher') is not None else "N/A" # Handle Origin with or without 'ref' attribute origin_elem = inscription.find('Origin') if origin_elem is not None: origin_ref = origin_elem.get('ref') if origin_ref: origin_id = origin_ref else: origin_text = origin_elem.text.strip() if origin_elem.text else "" origin_id = find_place_id_by_name(origin_text) else: origin_id = "N/A" origin = places_dict.get(origin_id, {}).get('Name', origin_id) origin_geonames_link = places_dict.get(origin_id, {}).get('GeoNames Link', "#") origin_pleiades_link = places_dict.get(origin_id, {}).get('Pleiades Link', "#") # Handle Material with or without 'ref' attribute material_elem = inscription.find('Material') if material_elem is not None: material_ref = material_elem.get('ref') if material_ref: material_id = material_ref else: material_text = material_elem.text.strip() if material_elem.text else "" # Attempt to find material ID by matching the name_en material_id = None for id_, material in materials_dict.items(): if material['Name_EN'].strip().lower() == material_text.strip().lower(): material_id = id_ break if not material_id: material_id = material_text # Use the text if no match found else: material_id = "N/A" material = materials_dict.get(material_id, {}).get('Name_EN', material_id) language = inscription.find('Language').text if inscription.find('Language') is not None else "N/A" text = "".join(inscription.find('Text').itertext()).strip() if inscription.find('Text') is not None else "N/A" dating = inscription.find('Dating').text if inscription.find('Dating') is not None else "N/A" images = inscription.find('Images').text if inscription.find('Images') is not None else "N/A" encoder = inscription.find('Encoder').text if inscription.find('Encoder') is not None else "N/A" category_terms = [term.text for term in inscription.findall('Category/term')] inscriptions.append({ 'Number': n, 'Publisher': publisher, 'Origin_ID': origin_id, 'Origin': origin, 'GeoNames Link': origin_geonames_link, 'Pleiades Link': origin_pleiades_link, 'Material_ID': material_id, 'Material': material, 'Language': language, 'Text': text, 'Dating': dating, 'Images': images, 'Encoder': encoder, 'Categories': ", ".join(category_terms) }) return pd.DataFrame(inscriptions) # ------------------------------- # Functions to Render Editions # ------------------------------- def render_diplomatic(text_element): lines = [] current_line = "" for elem in text_element.iter(): if elem.tag == "lb": if current_line: lines.append(current_line.strip()) current_line = "" # Start a new line line_number = elem.get("n", "") current_line += f"{line_number} " if line_number else "" elif elem.tag == "supplied": # Process nested <expan> elements and concatenate abbreviations supplied_content = "" for sub_elem in elem.findall(".//expan"): # Nested <expan> elements abbr_elem = sub_elem.find("abbr") if abbr_elem is not None and abbr_elem.text: supplied_content += abbr_elem.text.upper() current_line += f"[{supplied_content}]" elif elem.tag == "expan": # Use only the abbreviation part abbr_elem = elem.find("abbr") if abbr_elem is not None and abbr_elem.text: current_line += abbr_elem.text.upper() elif elem.tag == "g" and elem.get("type") == "leaf": current_line += " LEAF " elif elem.tag == "title" and elem.get("type") == "emperor": # Include title abbreviations title_ref = elem.get('ref') title_info = titles_dict.get(title_ref, {}) abbreviation = title_info.get('Abbreviation', '') current_line += abbreviation elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]: current_line += elem.text.upper() if current_line: lines.append(current_line.strip()) # Append the last line return "\n".join(lines) def render_editor(text_element): lines = [] current_line = "" for elem in text_element.iter(): if elem.tag == "lb": if current_line: lines.append(current_line.strip()) current_line = "" # Start a new line line_number = elem.get("n", "") current_line += f"{line_number} " if line_number else "" elif elem.tag == "supplied": # Process nested <expan> elements with abbreviation and expansion supplied_content = [] for sub_elem in elem.findall(".//expan"): # Nested <expan> elements abbr_elem = sub_elem.find("abbr") ex_elem = sub_elem.find("ex") abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else "" ex = ex_elem.text if ex_elem is not None and ex_elem.text else "" supplied_content.append(f"{abbr}({ex})") current_line += " ".join(supplied_content) elif elem.tag == "expan": # Render abbreviation and expansion abbr_elem = elem.find("abbr") ex_elem = elem.find("ex") abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else "" ex = ex_elem.text if ex_elem is not None and ex_elem.text else "" current_line += f"{abbr}({ex})" elif elem.tag == "g" and elem.get("type") == "leaf": current_line += " ((leaf)) " elif elem.tag == "title" and elem.get("type") == "emperor": # Render title abbreviation and name title_ref = elem.get('ref') title_info = titles_dict.get(title_ref, {}) abbreviation = title_info.get('Abbreviation', '') name_gr = title_info.get('Name_GR', '') current_line += f"{abbreviation} {name_gr}" elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]: current_line += elem.text if current_line: lines.append(current_line.strip()) # Append the last line return "\n".join(lines) # ------------------------------- # Streamlit App Layout # ------------------------------- st.set_page_config(page_title="Epigraphic XML Viewer", layout="wide") st.title("Epigraphic XML Viewer: Diplomatic and Editor Editions") # ------------------------------- # Sidebar - Project Information # ------------------------------- with st.sidebar: st.header("Project Information") st.markdown(""" **Epigraphic Database Viewer** is a tool designed to visualize and analyze ancient inscriptions. **Features**: - Upload and view XML inscriptions data. - Explore inscriptions in various formats. - Visualize geographical origins on an interactive map. **Authority Lists**: - **Materials**: Details about materials used in inscriptions. - **Places**: Geographical data and descriptions. - **Emperor Titles**: Titles and abbreviations used in inscriptions. **Developed by**: Your Name or Team """) # ------------------------------- # File uploader for Inscriptions XML # ------------------------------- uploaded_file = st.file_uploader("Upload Inscriptions XML File", type=["xml"]) if uploaded_file: st.success("File uploaded successfully!") # Read uploaded XML content inscriptions_content = uploaded_file.getvalue().decode("utf-8") else: st.info("No file uploaded. Using default sample XML data.") # Default XML data (as provided by the user) inscriptions_content = """<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE epiData SYSTEM "epiData.dtd"> <!--<!DOCTYPE epiData SYSTEM "https://raw.githubusercontent.com/Bestroi150/EpiDataBase/refs/heads/main/epiData.dtd">--> <epiData> <inscription n="1"> <Publisher>EDCS</Publisher> <Origin ref="VIZE">Vize</Origin> <Origin-Geonames-Link>https://www.geonames.org/738154/vize.html</Origin-Geonames-Link> <Origin-Pleiades-Link>https://pleiades.stoa.org/places/511190</Origin-Pleiades-Link> <Institution ID="AE 1951, 00257"></Institution> <Category> <term>Augusti/Augustae</term> <term>ordo senatorius</term> <term>tituli sacri</term> <term>tria nomina</term> <term>viri</term> </Category> <Material ref="LAPIS">lapis</Material> <Language>Greek</Language> <Text> <lb n="1"/>ἀγαθῇ τύχῃ <lb n="2"/>ὑπὲρ τῆς τοῦ <title type="emperor" ref="IMPERATOR">Αὐτοκράτορος</title> <lb n="3" break="no"/><expan><abbr>T</abbr><ex>ίτου</ex></expan> <expan>Αἰλ<ex>ίου</ex></expan> <persName type="emperor">Ἁδριανοῦ Ἀντωνείνου</persName> <title type="emperor">Καί <lb n="4"/>σαρος</title><expan>Σεβ<ex>αστοῦ</ex></expan> Εὐσεβοῦς καὶ Οὐήρου Καίσαρ <lb n="5"/>ος νείκης τε καὶ αἰωνίου διαμονῆς καὶ τοῦ <lb n="6"/>σύμπαντος αὐτῶν οἴκου ἱερᾶς τε <lb n="7"/>συνκλήτου καὶ δήμου Ῥωμαίων <lb n="8" break="no"/>ἡγεμονεύοντος <place type="province">ἐπαρχείας Θρᾴκης</place> <lb n="9"/><persName type="official"> <expan>Γ<ex>αΐου</ex></expan> Ἰουλίου <expan>Κομ<ex>μ</ex></expan>όδου</persName> <title type="official">πρεσβ<ex>ευτοῦ</ex></title> <expan>Σεβ<ex>αστοῦ</ex></expan> <lb n="10"/>ἀντιστρατήγου ἡ <place type="city">πόλις Βιζυηνῶν</place> <lb n="11"/>κατεσκεύασεν τοὺς πυργοὺς διὰ <lb n="12" break="no"/>ἐπιμελητῶν Φίρμου Αυλουπορε <lb n="13"/>ος καὶ Αυλουκενθου Δυτουκενθου <lb n="14"/>καὶ Ραζδου Ὑακίνθου εὐτυχεῖτε </Text> <Dating>155 to 155</Dating> <Images>https://db.edcs.eu/epigr/ae/ae1951/ae1951-74.pdf</Images> <Encoder>Admin</Encoder> </inscription> <!-- Additional inscriptions can be added here --> </epiData> """ # ------------------------------- # Parse Inscriptions # ------------------------------- try: df = parse_inscriptions(inscriptions_content) except ET.ParseError as e: st.error(f"Error parsing XML: {e}") st.stop() # ------------------------------- # Tabs for Different Views # ------------------------------- tabs = st.tabs(["Raw XML", "DataFrame", "Diplomatic Edition", "Editor Edition", "Visualization"]) # ------------------------------- # Raw XML Tab # ------------------------------- with tabs[0]: st.subheader("Raw XML Content") st.text_area("XML Content", inscriptions_content, height=600) # ------------------------------- # DataFrame Tab # ------------------------------- with tabs[1]: st.subheader("Inscriptions Data") st.dataframe(df) # ------------------------------- # Diplomatic Edition Tab # ------------------------------- with tabs[2]: st.subheader("Diplomatic Edition") # Select Inscription inscription_numbers = df['Number'].tolist() selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers) selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0] # Parse the selected inscription's XML to get the Text element tree = ET.ElementTree(ET.fromstring(inscriptions_content)) root = tree.getroot() inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']") text_element = inscription_elem.find("Text") if inscription_elem is not None else None if text_element is not None: diplomatic_text = render_diplomatic(text_element) st.text_area("Diplomatic Edition Text", diplomatic_text, height=600) else: st.warning("No text found for the selected inscription.") # ------------------------------- # Editor Edition Tab # ------------------------------- with tabs[3]: st.subheader("Editor Edition") # Select Inscription inscription_numbers = df['Number'].tolist() selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers, key='editor_select') selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0] # Parse the selected inscription's XML to get the Text element tree = ET.ElementTree(ET.fromstring(inscriptions_content)) root = tree.getroot() inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']") text_element = inscription_elem.find("Text") if inscription_elem is not None else None if text_element is not None: editor_text = render_editor(text_element) st.text_area("Editor Edition Text", editor_text, height=600) else: st.warning("No text found for the selected inscription.") # ------------------------------- # Visualization Tab # ------------------------------- with tabs[4]: st.subheader("Visualization") # Extract categories all_categories = set() for categories in df['Categories']: for cat in categories.split(", "): all_categories.add(cat) # Category filtering selected_categories = st.multiselect("Filter by Category", sorted(all_categories)) if selected_categories: filtered_df = df[df['Categories'].apply(lambda x: any(cat in x.split(", ") for cat in selected_categories))] else: filtered_df = df.copy() # Merge with places to get coordinates def get_coordinates(origin_id): place = places_dict.get(origin_id, {}) return place.get('Latitude'), place.get('Longitude') # Apply the function to get Latitude and Longitude filtered_df['Latitude'], filtered_df['Longitude'] = zip(*filtered_df['Origin_ID'].apply(get_coordinates)) # Drop entries without coordinates map_df = filtered_df.dropna(subset=['Latitude', 'Longitude']) if not map_df.empty: # Create a Folium map centered around the average coordinates avg_lat = map_df['Latitude'].mean() avg_lon = map_df['Longitude'].mean() folium_map = folium.Map(location=[avg_lat, avg_lon], zoom_start=6) # Add markers to the map for _, row in map_df.iterrows(): popup_content = f""" <b>Inscription Number:</b> {row['Number']}<br> <b>Publisher:</b> {row['Publisher']}<br> <b>Material:</b> {row['Material']}<br> <b>Language:</b> {row['Language']}<br> <b>Dating:</b> {row['Dating']}<br> <b>Encoder:</b> {row['Encoder']}<br> <b>Categories:</b> {row['Categories']}<br> <b>Text:</b> {row['Text']}<br> """ if row['Images'] and row['Images'] != "N/A": popup_content += f'<a href="{row["Images"]}" target="_blank">View Images</a><br>' folium.Marker( location=[row['Latitude'], row['Longitude']], popup=folium.Popup(popup_content, max_width=300), tooltip=f"Inscription {row['Number']}" ).add_to(folium_map) # Display the Folium map using streamlit_folium st_folium(folium_map, width=700, height=500) else: st.write("No inscriptions to display on the map based on the selected filters.") st.dataframe(filtered_df) # Detailed View for _, row in filtered_df.iterrows(): with st.expander(f"Inscription {row['Number']}"): st.markdown(f"**Publisher**: {row['Publisher']}") st.markdown(f"**Origin**: {row['Origin']} ([GeoNames Link]({row['GeoNames Link']}), [Pleiades Link]({row['Pleiades Link']}))") st.markdown(f"**Material**: {row['Material']} - {materials_dict.get(row['Material_ID'], {}).get('Description', '')}") st.markdown(f"**Language**: {row['Language']}") st.markdown(f"**Dating**: {row['Dating']}") st.markdown(f"**Encoder**: {row['Encoder']}") st.markdown(f"**Categories**: {row['Categories']}") st.markdown(f"**Text**:\n\n{row['Text']}") if row['Images'] and row['Images'] != "N/A": st.markdown(f"[View Images]({row['Images']})") # Display material description material_desc = materials_dict.get(row['Material_ID'], {}).get('Description', "No description available.") st.markdown(f"**Material Description**: {material_desc}") # Display place description place_desc = places_dict.get(row['Origin_ID'], {}).get('Description', "No description available.") st.markdown(f"**Place Description**: {place_desc}") # ------------------------------- # Footer # ------------------------------- st.markdown(""" --- **© 2024 InscriptaNETr** """)