Spaces:
Sleeping
Sleeping
import streamlit as st | |
import xml.etree.ElementTree as ET | |
import pandas as pd | |
from io import StringIO | |
import folium | |
from streamlit_folium import st_folium | |
import unicodedata | |
import networkx as nx | |
import plotly.express as px | |
import plotly.graph_objects as go | |
# ------------------------------- | |
# Authority Lists as XML Strings | |
# ------------------------------- | |
materials_xml = """<?xml version="1.0" encoding="UTF-8"?> | |
<materials> | |
<material id="LAPIS"> | |
<name>Lapis</name> | |
<name_en>Stone</name_en> | |
<description>Stone used as a durable medium for inscriptions and engravings.</description> | |
</material> | |
<material id="ARGENTUM"> | |
<name>Argentum</name> | |
<name_en>Silver</name_en> | |
<description>Silver used in inscriptions, often for its lustrous appearance and value.</description> | |
</material> | |
<material id="PLUMBUM"> | |
<name>Plumbum</name> | |
<name_en>Lead</name_en> | |
<description>Lead utilized in inscriptions, valued for its malleability and ease of engraving.</description> | |
</material> | |
<material id="OPUS_FIGLINAE"> | |
<name>Opus Figlinae</name> | |
<name_en>Pottery</name_en> | |
<description>Pottery used as a medium for inscriptions, typically in the form of ceramic artifacts.</description> | |
</material> | |
</materials> | |
""" | |
places_xml = """<?xml version="1.0" encoding="UTF-8"?> | |
<places> | |
<place id="VIZE"> | |
<name>Vize</name> | |
<geonamesLink>https://www.geonames.org/738154/vize.html</geonamesLink> | |
<pleiadesLink>https://pleiades.stoa.org/places/511190</pleiadesLink> | |
<latitude>40.6545</latitude> | |
<longitude>28.4078</longitude> | |
<description>Ancient city located in modern-day Turkey.</description> | |
</place> | |
<place id="PHILIPPI"> | |
<name>Philippi</name> | |
<geonamesLink>https://www.geonames.org/734652/filippoi-philippi.html</geonamesLink> | |
<pleiadesLink>https://pleiades.stoa.org/places/501482</pleiadesLink> | |
<latitude>40.5044</latitude> | |
<longitude>24.9722</longitude> | |
<description>Ancient city in Macedonia, founded by Philip II of Macedon.</description> | |
</place> | |
<place id="AUGUSTA_TRAIANA"> | |
<name>Augusta Traiana</name> | |
<geonamesLink>https://www.geonames.org/maps/google_42.4333_25.65.html</geonamesLink> | |
<pleiadesLink>https://pleiades.stoa.org/places/216731</pleiadesLink> | |
<latitude>42.4259</latitude> | |
<longitude>25.6272</longitude> | |
<description>Ancient Roman city, present-day Stara Zagora in Bulgaria.</description> | |
</place> | |
<place id="DYRRACHIUM"> | |
<name>Dyrrachium</name> | |
<geonamesLink>https://www.geonames.org/3185728/durres.html</geonamesLink> | |
<pleiadesLink>https://pleiades.stoa.org/places/481818</pleiadesLink> | |
<latitude>41.3231</latitude> | |
<longitude>19.4417</longitude> | |
<description>Ancient city on the Adriatic coast, present-day Durrës in Albania.</description> | |
</place> | |
<place id="ANTISARA"> | |
<name>Antisara</name> | |
<geonamesLink>https://www.geonames.org/736079/akra-kalamitsa.html</geonamesLink> | |
<pleiadesLink>https://pleiades.stoa.org/places/501351</pleiadesLink> | |
<latitude>39.5000</latitude> | |
<longitude>20.0000</longitude> | |
<description>Ancient settlement, exact modern location TBD.</description> | |
</place> | |
<place id="MACEDONIA"> | |
<name>Macedonia</name> | |
<geonamesLink>-</geonamesLink> | |
<pleiadesLink>-</pleiadesLink> | |
<latitude>40.0000</latitude> | |
<longitude>22.0000</longitude> | |
<description>Historical region in Southeast Europe, encompassing parts of modern Greece, North Macedonia, and Bulgaria.</description> | |
</place> | |
</places> | |
""" | |
titles_xml = """<?xml version="1.0" encoding="UTF-8"?> | |
<emperorTitles> | |
<title id="IMPERATOR"> | |
<name>Imperator</name> | |
<name_gr>Αυτοκράτορας</name_gr> | |
<abbreviation>Imp.</abbreviation> | |
<description>A title granted to a victorious general, later adopted as a formal title by Roman emperors.</description> | |
</title> | |
<title id="CAESAR"> | |
<name>Caesar</name> | |
<name_gr>Καῖσαρ</name_gr> | |
<abbreviation>Caes.</abbreviation> | |
<description>A title used by Roman emperors, originally the family name of Julius Caesar.</description> | |
</title> | |
<title id="AUGUSTUS"> | |
<name>Augustus</name> | |
<name_gr>-</name_gr> | |
<abbreviation>Aug.</abbreviation> | |
<description>The first Roman emperor's title, signifying revered or majestic status.</description> | |
</title> | |
</emperorTitles> | |
""" | |
# ------------------------------- | |
# Parse Authority Lists | |
# ------------------------------- | |
def parse_materials(xml_string): | |
materials = {} | |
root = ET.fromstring(xml_string) | |
for material in root.findall('material'): | |
material_id = material.get('id') | |
materials[material_id] = { | |
'Name': material.find('name').text, | |
'Name_EN': material.find('name_en').text, | |
'Description': material.find('description').text | |
} | |
return materials | |
def parse_places(xml_string): | |
places = {} | |
root = ET.fromstring(xml_string) | |
for place in root.findall('place'): | |
place_id = place.get('id') | |
places[place_id] = { | |
'Name': place.find('name').text, | |
'GeoNames Link': place.find('geonamesLink').text, | |
'Pleiades Link': place.find('pleiadesLink').text, | |
'Latitude': float(place.find('latitude').text), | |
'Longitude': float(place.find('longitude').text), | |
'Description': place.find('description').text | |
} | |
return places | |
def parse_titles(xml_string): | |
titles = {} | |
root = ET.fromstring(xml_string) | |
for title in root.findall('title'): | |
title_id = title.get('id') | |
titles[title_id] = { | |
'Name': title.find('name').text, | |
'Name_GR': title.find('name_gr').text, | |
'Abbreviation': title.find('abbreviation').text, | |
'Description': title.find('description').text | |
} | |
return titles | |
# Load authority data | |
materials_dict = parse_materials(materials_xml) | |
places_dict = parse_places(places_xml) | |
titles_dict = parse_titles(titles_xml) | |
# ------------------------------- | |
# Function to Find Place ID by Name (Case-Insensitive) | |
# ------------------------------- | |
def find_place_id_by_name(name): | |
""" | |
Finds the place ID by matching the place name (case-insensitive). | |
Returns the place ID if found, else returns the original name. | |
""" | |
for id_, place in places_dict.items(): | |
if place['Name'].strip().lower() == name.strip().lower(): | |
return id_ | |
return name # Return the original name if no match is found | |
# ------------------------------- | |
# Function to Parse Inscriptions | |
# ------------------------------- | |
def parse_inscriptions(xml_content): | |
tree = ET.ElementTree(ET.fromstring(xml_content)) | |
root = tree.getroot() | |
inscriptions = [] | |
for inscription in root.findall('inscription'): | |
n = inscription.get('n') | |
publisher = inscription.find('Publisher').text if inscription.find('Publisher') is not None else "N/A" | |
# Handle Origin with or without 'ref' attribute | |
origin_elem = inscription.find('Origin') | |
if origin_elem is not None: | |
origin_ref = origin_elem.get('ref') | |
if origin_ref: | |
origin_id = origin_ref | |
else: | |
origin_text = origin_elem.text.strip() if origin_elem.text else "" | |
origin_id = find_place_id_by_name(origin_text) | |
else: | |
origin_id = "N/A" | |
origin = places_dict.get(origin_id, {}).get('Name', origin_id) | |
origin_geonames_link = places_dict.get(origin_id, {}).get('GeoNames Link', "#") | |
origin_pleiades_link = places_dict.get(origin_id, {}).get('Pleiades Link', "#") | |
latitude = places_dict.get(origin_id, {}).get('Latitude', None) | |
longitude = places_dict.get(origin_id, {}).get('Longitude', None) | |
# Handle Material with or without 'ref' attribute | |
material_elem = inscription.find('Material') | |
if material_elem is not None: | |
material_ref = material_elem.get('ref') | |
if material_ref: | |
material_id = material_ref | |
else: | |
material_text = material_elem.text.strip() if material_elem.text else "" | |
# Attempt to find material ID by matching the name_en | |
material_id = None | |
for id_, material in materials_dict.items(): | |
if material['Name_EN'].strip().lower() == material_text.strip().lower(): | |
material_id = id_ | |
break | |
if not material_id: | |
material_id = material_text # Use the text if no match found | |
else: | |
material_id = "N/A" | |
material = materials_dict.get(material_id, {}).get('Name_EN', material_id) | |
language = inscription.find('Language').text if inscription.find('Language') is not None else "N/A" | |
# Extract Titles from the Text element | |
text_elem = inscription.find('Text') | |
titles_used = [] | |
titles_descriptions = [] | |
if text_elem is not None: | |
for title in text_elem.findall('.//title'): | |
title_ref = title.get('ref') | |
if title_ref and title_ref in titles_dict: | |
title_info = titles_dict[title_ref] | |
title_name = title_info['Name'] | |
title_description = title_info['Description'] | |
titles_used.append(title_name) | |
titles_descriptions.append(title_description) | |
elif title.text: | |
title_text = title.text.strip() | |
titles_used.append(title_text) | |
titles_descriptions.append("No description available.") | |
text = "".join(text_elem.itertext()).strip() if text_elem is not None else "N/A" | |
dating = inscription.find('Dating').text if inscription.find('Dating') is not None else "N/A" | |
images = inscription.find('Images').text if inscription.find('Images') is not None else "N/A" | |
encoder = inscription.find('Encoder').text if inscription.find('Encoder') is not None else "N/A" | |
category_terms = [term.text for term in inscription.findall('Category/term')] | |
inscriptions.append({ | |
'Number': n, | |
'Publisher': publisher, | |
'Origin_ID': origin_id, | |
'Origin': origin, | |
'GeoNames Link': origin_geonames_link, | |
'Pleiades Link': origin_pleiades_link, | |
'Latitude': latitude, | |
'Longitude': longitude, | |
'Material_ID': material_id, | |
'Material': material, | |
'Language': language, | |
'Titles': ", ".join(titles_used) if titles_used else "N/A", | |
'Title_Descriptions': "; ".join(titles_descriptions) if titles_descriptions else "N/A", | |
'Text': text, | |
'Dating': dating, | |
'Images': images, | |
'Encoder': encoder, | |
'Categories': ", ".join(category_terms) | |
}) | |
return pd.DataFrame(inscriptions) | |
# ------------------------------- | |
# Functions to Render Editions | |
# ------------------------------- | |
def render_diplomatic(text_element): | |
lines = [] | |
current_line = "" | |
for elem in text_element.iter(): | |
if elem.tag == "lb": | |
if current_line: | |
lines.append(current_line.strip()) | |
current_line = "" # Start a new line | |
line_number = elem.get("n", "") | |
current_line += f"{line_number} " if line_number else "" | |
elif elem.tag == "supplied": | |
# Process nested <expan> elements and concatenate abbreviations | |
supplied_content = "" | |
for sub_elem in elem.findall(".//expan"): # Nested <expan> elements | |
abbr_elem = sub_elem.find("abbr") | |
if abbr_elem is not None and abbr_elem.text: | |
supplied_content += abbr_elem.text.upper() | |
current_line += f"[{supplied_content}]" | |
elif elem.tag == "expan": | |
# Use only the abbreviation part | |
abbr_elem = elem.find("abbr") | |
if abbr_elem is not None and abbr_elem.text: | |
current_line += abbr_elem.text.upper() | |
elif elem.tag == "g" and elem.get("type") == "leaf": | |
current_line += " LEAF " | |
elif elem.tag == "title" and elem.get("type") == "emperor": | |
# Include title abbreviations | |
title_ref = elem.get('ref') | |
title_info = titles_dict.get(title_ref, {}) | |
abbreviation = title_info.get('Abbreviation', '') | |
current_line += abbreviation | |
elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]: | |
current_line += elem.text.upper() | |
if current_line: | |
lines.append(current_line.strip()) # Append the last line | |
return "\n".join(lines) | |
def render_editor(text_element): | |
lines = [] | |
current_line = "" | |
for elem in text_element.iter(): | |
if elem.tag == "lb": | |
if current_line: | |
lines.append(current_line.strip()) | |
current_line = "" # Start a new line | |
line_number = elem.get("n", "") | |
current_line += f"{line_number} " if line_number else "" | |
elif elem.tag == "supplied": | |
# Process nested <expan> elements with abbreviation and expansion | |
supplied_content = [] | |
for sub_elem in elem.findall(".//expan"): # Nested <expan> elements | |
abbr_elem = sub_elem.find("abbr") | |
ex_elem = sub_elem.find("ex") | |
abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else "" | |
ex = ex_elem.text if ex_elem is not None and ex_elem.text else "" | |
supplied_content.append(f"{abbr}({ex})") | |
current_line += " ".join(supplied_content) | |
elif elem.tag == "expan": | |
# Render abbreviation and expansion | |
abbr_elem = elem.find("abbr") | |
ex_elem = elem.find("ex") | |
abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else "" | |
ex = ex_elem.text if ex_elem is not None and ex_elem.text else "" | |
current_line += f"{abbr}({ex})" | |
elif elem.tag == "g" and elem.get("type") == "leaf": | |
current_line += " ((leaf)) " | |
elif elem.tag == "title" and elem.get("type") == "emperor": | |
# Render title abbreviation and name | |
title_ref = elem.get('ref') | |
title_info = titles_dict.get(title_ref, {}) | |
abbreviation = title_info.get('Abbreviation', '') | |
name_gr = title_info.get('Name_GR', '') | |
current_line += f"{abbreviation} {name_gr}" | |
elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]: | |
current_line += elem.text | |
if current_line: | |
lines.append(current_line.strip()) # Append the last line | |
return "\n".join(lines) | |
# ------------------------------- | |
# Streamlit App Layout | |
# ------------------------------- | |
st.set_page_config(page_title="Epigraphic XML Viewer", layout="wide") | |
st.title("Epigraphic XML Viewer: Diplomatic and Editor Editions") | |
# ------------------------------- | |
# Sidebar - Project Information | |
# ------------------------------- | |
with st.sidebar: | |
st.image("imgs/logo_inscripta.jpg", use_container_width=True, caption="Latin and Ancient Greek Inscriptions") | |
st.header("Project Information") | |
st.markdown(""" | |
**Epigraphic Database Viewer** is a tool designed to visualize and analyze ancient inscriptions. | |
**Features**: | |
- Upload and view XML inscriptions data. | |
- Explore inscriptions in various formats. | |
- Visualize geographical origins on an interactive map. | |
**Authority Lists**: | |
- **Materials**: Details about materials used in inscriptions. | |
- **Places**: Geographical data and descriptions. | |
- **Emperor Titles**: Titles and abbreviations used in inscriptions. | |
**Developed by**: Kristiyan Simeonov, Sofia University | |
""") | |
# ------------------------------- | |
# File uploader for Inscriptions XML | |
# ------------------------------- | |
uploaded_file = st.file_uploader("Upload Inscriptions XML File", type=["xml"]) | |
if uploaded_file: | |
st.success("File uploaded successfully!") | |
# Read uploaded XML content | |
inscriptions_content = uploaded_file.getvalue().decode("utf-8") | |
else: | |
st.info("No file uploaded. Using default sample XML data.") | |
# Default XML data (as provided by the user) | |
inscriptions_content = """<?xml version="1.0" encoding="UTF-8"?> | |
<!DOCTYPE epiData SYSTEM "epiData.dtd"> <!--<!DOCTYPE epiData SYSTEM "https://raw.githubusercontent.com/Bestroi150/EpiDataBase/refs/heads/main/epiData.dtd">--> | |
<epiData> | |
<inscription n="1"> | |
<Publisher>EDCS</Publisher> | |
<Origin ref="VIZE">Vize</Origin> | |
<Origin-Geonames-Link>https://www.geonames.org/738154/vize.html</Origin-Geonames-Link> | |
<Origin-Pleiades-Link>https://pleiades.stoa.org/places/511190</Origin-Pleiades-Link> | |
<Institution ID="AE 1951, 00257"></Institution> | |
<Category> | |
<term>Augusti/Augustae</term> | |
<term>ordo senatorius</term> | |
<term>tituli sacri</term> | |
<term>tria nomina</term> | |
<term>viri</term> | |
</Category> | |
<Material ref="LAPIS">lapis</Material> | |
<Language>Greek</Language> | |
<Text> | |
<lb n="1"/>ἀγαθῇ τύχῃ | |
<lb n="2"/>ὑπὲρ τῆς τοῦ <title type="emperor" ref="IMPERATOR">Αὐτοκράτορος</title> | |
<lb n="3" break="no"/><expan><abbr>T</abbr><ex>ίτου</ex></expan> <expan>Αἰλ<ex>ίου</ex></expan> <persName type="emperor">Ἁδριανοῦ Ἀντωνείνου</persName> <title type="emperor">Καί | |
<lb n="4"/>σαρος</title><expan>Σεβ<ex>αστοῦ</ex></expan> Εὐσεβοῦς καὶ Οὐήρου Καίσαρ | |
<lb n="5"/>ος νείκης τε καὶ αἰωνίου διαμονῆς καὶ τοῦ | |
<lb n="6"/>σύμπαντος αὐτῶν οἴκου ἱερᾶς τε | |
<lb n="7"/>συνκλήτου καὶ δήμου Ῥωμαίων | |
<lb n="8" break="no"/>ἡγεμονεύοντος <place type="province">ἐπαρχείας Θρᾴκης</place> | |
<lb n="9"/><persName type="official"> <expan>Γ<ex>αΐου</ex></expan> Ἰουλίου <expan>Κομ<ex>μ</ex></expan>όδου</persName> <title type="official">πρεσβ<ex>ευτοῦ</ex></title> <expan>Σεβ<ex>αστοῦ</ex></expan> | |
<lb n="10"/>ἀντιστρατήγου ἡ <place type="city">πόλις Βιζυηνῶν</place> | |
<lb n="11"/>κατεσκεύασεν τοὺς πυργοὺς διὰ | |
<lb n="12" break="no"/>ἐπιμελητῶν Φίρμου Αυλουπορε | |
<lb n="13"/>ος καὶ Αυλουκενθου Δυτουκενθου | |
<lb n="14"/>καὶ Ραζδου Ὑακίνθου εὐτυχεῖτε | |
</Text> | |
<Dating>155 to 155</Dating> | |
<Images>https://db.edcs.eu/epigr/ae/ae1951/ae1951-74.pdf</Images> | |
<Encoder>Admin</Encoder> | |
</inscription> | |
</epiData> | |
""" | |
# ------------------------------- | |
# Parse Inscriptions | |
# ------------------------------- | |
try: | |
df = parse_inscriptions(inscriptions_content) | |
except ET.ParseError as e: | |
st.error(f"Error parsing XML: {e}") | |
st.stop() | |
# ------------------------------- | |
# Tabs for Different Views | |
# ------------------------------- | |
tabs = st.tabs(["Raw XML", "DataFrame", "Diplomatic Edition", "Editor Edition", "Visualization", "Authority Connections"]) | |
# ------------------------------- | |
# Raw XML Tab | |
# ------------------------------- | |
with tabs[0]: | |
st.subheader("Raw XML Content") | |
st.code(inscriptions_content, language="xml") | |
# ------------------------------- | |
# DataFrame Tab | |
# ------------------------------- | |
with tabs[1]: | |
st.subheader("Inscriptions Data") | |
st.dataframe(df) | |
# ------------------------------- | |
# Diplomatic Edition Tab | |
# ------------------------------- | |
import streamlit as st | |
import xml.etree.ElementTree as ET | |
import unicodedata | |
# Function to remove diacritics from text | |
def remove_diacritics(text): | |
""" | |
Removes diacritics from the input text. | |
""" | |
normalized_text = unicodedata.normalize('NFD', text) | |
return ''.join( | |
char for char in normalized_text | |
if unicodedata.category(char) != 'Mn' | |
) | |
# Function to process the Text element | |
def render_diplomatic(text_elem): | |
""" | |
Transforms the XML Text element into uppercase Greek text without diacritics and spaces, | |
with line breaks at <lb> tags. Handles <expan> tags by including only the <abbr> text. | |
""" | |
lines = [] | |
current_line = [] | |
# Define a helper function to process elements recursively | |
def process_element(elem): | |
if elem.tag == 'lb': | |
finalize_current_line() | |
if elem.tail: | |
# After <lb>, the tail text is the start of the new line | |
current_line.append(elem.tail) | |
elif elem.tag == 'expan': | |
abbr_elem = elem.find('abbr') | |
if abbr_elem is not None and abbr_elem.text: | |
current_line.append(abbr_elem.text) | |
# Do not process <ex> or any other children within <expan> | |
if elem.tail: | |
current_line.append(elem.tail) | |
else: | |
if elem.text: | |
current_line.append(elem.text) | |
# Recursively process child elements | |
for child in elem: | |
process_element(child) | |
if elem.tail: | |
current_line.append(elem.tail) | |
def finalize_current_line(): | |
""" | |
Finalizes the current line by removing diacritics, spaces, converting to uppercase, | |
and appending it to the lines list. | |
""" | |
nonlocal current_line | |
line_text = ''.join(current_line).strip() | |
if line_text: | |
# Remove diacritics and spaces, then convert to uppercase | |
line_text = remove_diacritics(line_text).replace(' ', '').upper() | |
lines.append(line_text) | |
current_line = [] | |
# Start processing from the root text element | |
process_element(text_elem) | |
# Finalize the last line if any | |
if current_line: | |
finalize_current_line() | |
# Join all lines with newline characters | |
return '\n'.join(lines) | |
# Streamlit Application | |
# Ensure that 'tabs' and 'df' are properly defined in your Streamlit app context | |
with tabs[2]: | |
st.subheader("Diplomatic Edition") | |
# Select Inscription | |
inscription_numbers = df['Number'].tolist() | |
selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers) | |
selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0] | |
# Parse the selected inscription's XML to get the Text element | |
try: | |
tree = ET.ElementTree(ET.fromstring(inscriptions_content)) | |
root = tree.getroot() | |
inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']") | |
text_element = inscription_elem.find("Text") if inscription_elem is not None else None | |
except ET.ParseError: | |
st.error("Failed to parse the XML content. Please check the XML structure.") | |
text_element = None | |
if text_element is not None: | |
diplomatic_text = render_diplomatic(text_element) | |
st.code(diplomatic_text, language="plaintext") | |
else: | |
st.warning("No text found for the selected inscription.") | |
# ------------------------------- | |
# Editor Edition Tab | |
# ------------------------------- | |
def render_editor(text_element): | |
""" | |
Processes the Text XML element and converts it to plaintext. | |
""" | |
def process_element(elem): | |
result = elem.text if elem.text else '' | |
for child in elem: | |
if child.tag == 'lb': | |
# Line break; add a newline | |
result += '\n' | |
elif child.tag == 'expan': | |
# Handle expansions, e.g., <expan><abbr>T</abbr><ex>ίτου</ex></expan> → T(ίτου) | |
abbr = child.find('abbr') | |
ex = child.find('ex') | |
if abbr is not None and ex is not None: | |
result += f"{abbr.text}({ex.text})" | |
else: | |
# If structure is unexpected, process children recursively | |
result += process_element(child) | |
elif child.tag == 'abbr': | |
# Abbreviation; add text without special formatting | |
result += child.text if child.text else '' | |
elif child.tag == 'ex': | |
# Expansion; add text within parentheses | |
result += f"({child.text})" if child.text else '' | |
elif child.tag in ['persName', 'place', 'title']: | |
# Names and titles; add text without tags | |
# If they contain nested elements, process them | |
result += process_element(child) | |
else: | |
# For any other tags, process their children | |
result += process_element(child) | |
if child.tail: | |
result += child.tail | |
return result | |
return process_element(text_element).strip() | |
with tabs[3]: | |
st.subheader("Editor Edition") | |
# Select Inscription | |
inscription_numbers = df['Number'].tolist() | |
selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers, key='editor_select') | |
# Parse the entire XML to find the selected inscription | |
try: | |
# Parse the entire XML content | |
tree = ET.ElementTree(ET.fromstring(inscriptions_content)) | |
root = tree.getroot() | |
# Locate the inscription element with the matching number | |
inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']") | |
# If the root itself is the inscription | |
if inscription_elem is None and root.tag == 'inscription' and root.attrib.get('n') == str(selected_inscription_num): | |
inscription_elem = root | |
text_element = inscription_elem.find("Text") if inscription_elem is not None else None | |
if text_element is not None: | |
editor_text = render_editor(text_element) | |
st.code(editor_text, language="plaintext") | |
else: | |
st.warning("No text found for the selected inscription.") | |
except ET.ParseError as e: | |
st.error(f"Error parsing XML: {e}") | |
except Exception as e: | |
st.error(f"An unexpected error occurred: {e}") | |
# ------------------------------- | |
# Visualization Tab | |
# ------------------------------- | |
with tabs[4]: | |
st.subheader("Visualization") | |
# Extract categories | |
all_categories = set() | |
for categories in df['Categories']: | |
for cat in categories.split(", "): | |
all_categories.add(cat) | |
# Category filtering | |
selected_categories = st.multiselect("Filter by Category", sorted(all_categories)) | |
if selected_categories: | |
filtered_df = df[df['Categories'].apply(lambda x: any(cat in x.split(", ") for cat in selected_categories))] | |
else: | |
filtered_df = df.copy() | |
# Merge with places to get coordinates | |
def get_coordinates(origin_id): | |
place = places_dict.get(origin_id, {}) | |
return place.get('Latitude'), place.get('Longitude') | |
# Apply the function to get Latitude and Longitude | |
filtered_df['Latitude'], filtered_df['Longitude'] = zip(*filtered_df['Origin_ID'].apply(get_coordinates)) | |
# Drop entries without coordinates | |
map_df = filtered_df.dropna(subset=['Latitude', 'Longitude']) | |
if not map_df.empty: | |
# Create a Folium map centered around the average coordinates | |
avg_lat = map_df['Latitude'].mean() | |
avg_lon = map_df['Longitude'].mean() | |
folium_map = folium.Map(location=[avg_lat, avg_lon], zoom_start=6) | |
# Add markers to the map | |
for _, row in map_df.iterrows(): | |
popup_content = f""" | |
<b>Inscription Number:</b> {row['Number']}<br> | |
<b>Publisher:</b> {row['Publisher']}<br> | |
<b>Material:</b> {row['Material']}<br> | |
<b>Language:</b> {row['Language']}<br> | |
<b>Dating:</b> {row['Dating']}<br> | |
<b>Encoder:</b> {row['Encoder']}<br> | |
<b>Categories:</b> {row['Categories']}<br> | |
<b>Text:</b> {row['Text']}<br> | |
""" | |
if row['Images'] and row['Images'] != "N/A": | |
popup_content += f'<a href="{row["Images"]}" target="_blank">View Images</a><br>' | |
folium.Marker( | |
location=[row['Latitude'], row['Longitude']], | |
popup=folium.Popup(popup_content, max_width=300), | |
tooltip=f"Inscription {row['Number']}" | |
).add_to(folium_map) | |
# Display the Folium map using streamlit_folium | |
st_folium(folium_map, width=700, height=500) | |
else: | |
st.write("No inscriptions to display on the map based on the selected filters.") | |
st.dataframe(filtered_df) | |
# Detailed View | |
for _, row in filtered_df.iterrows(): | |
with st.expander(f"Inscription {row['Number']}"): | |
st.markdown(f"**Publisher**: {row['Publisher']}") | |
st.markdown(f"**Origin**: {row['Origin']} ([GeoNames Link]({row['GeoNames Link']}), [Pleiades Link]({row['Pleiades Link']}))") | |
st.markdown(f"**Material**: {row['Material']} - {materials_dict.get(row['Material_ID'], {}).get('Description', '')}") | |
st.markdown(f"**Language**: {row['Language']}") | |
st.markdown(f"**Dating**: {row['Dating']}") | |
st.markdown(f"**Encoder**: {row['Encoder']}") | |
st.markdown(f"**Categories**: {row['Categories']}") | |
st.markdown(f"**Text**:\n\n{row['Text']}") | |
if row['Images'] and row['Images'] != "N/A": | |
st.markdown(f"[View Images]({row['Images']})") | |
# Display material description | |
material_desc = materials_dict.get(row['Material_ID'], {}).get('Description', "No description available.") | |
st.markdown(f"**Material Description**: {material_desc}") | |
# Display place description | |
place_desc = places_dict.get(row['Origin_ID'], {}).get('Description', "No description available.") | |
st.markdown(f"**Place Description**: {place_desc}") | |
# ------------------------------- | |
# Authority Connections Tab | |
# ------------------------------- | |
with tabs[5]: | |
st.subheader("Authority Connections") | |
# Define Authority Types | |
authority_types = ["Material", "Place", "Title"] # Added "Title" | |
# Select Authority Type | |
selected_authority_type = st.selectbox("Select Authority Type", authority_types) | |
# Based on selection, provide the corresponding options | |
if selected_authority_type == "Material": | |
# List all materials from materials_dict | |
material_names = [material['Name_EN'] for material in materials_dict.values()] | |
selected_material = st.selectbox("Select Material", sorted(material_names)) | |
# Find the material ID based on the selected name | |
material_id = None | |
for id_, material in materials_dict.items(): | |
if material['Name_EN'] == selected_material: | |
material_id = id_ | |
break | |
if material_id: | |
# Filter inscriptions that reference this material | |
connected_inscriptions = df[df['Material_ID'] == material_id] | |
st.markdown(f"### Inscriptions using **{selected_material}**") | |
st.write(f"**Total Inscriptions:** {len(connected_inscriptions)}") | |
if not connected_inscriptions.empty: | |
# Display inscriptions in a table | |
st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Origin', 'Language', 'Dating', 'Encoder']]) | |
# **Plotly Visualization: Inscriptions Over Time** | |
st.markdown("#### Inscriptions Over Time") | |
# Assuming 'Dating' is in a format that can be processed (e.g., "155 to 155") | |
def extract_start_year(dating): | |
if isinstance(dating, str): | |
parts = dating.split('to') | |
try: | |
return int(parts[0].strip()) | |
except: | |
return None | |
return None | |
connected_inscriptions['Start_Year'] = connected_inscriptions['Dating'].apply(extract_start_year) | |
year_counts = connected_inscriptions['Start_Year'].dropna().astype(int).value_counts().sort_index() | |
year_counts = year_counts.reset_index() | |
year_counts.columns = ['Year', 'Count'] | |
fig_bar = px.bar( | |
year_counts, | |
x='Year', | |
y='Count', | |
labels={'Count': 'Number of Inscriptions'}, | |
title=f'Number of Inscriptions Using {selected_material} Over Time', | |
template='plotly_white' | |
) | |
st.plotly_chart(fig_bar, use_container_width=True) | |
# **Plotly Visualization: Network Graph of Inscriptions and Materials** | |
st.markdown("#### Network Graph of Inscriptions and Materials") | |
# Create a network graph using Plotly | |
G = nx.Graph() | |
# Add nodes | |
G.add_node(selected_material, type='Material') | |
for _, row in connected_inscriptions.iterrows(): | |
inscription_node = f"Inscription {row['Number']}" | |
G.add_node(inscription_node, type='Inscription') | |
G.add_edge(selected_material, inscription_node) | |
# Generate positions for the nodes | |
pos = nx.spring_layout(G, k=0.5, iterations=50) | |
edge_x = [] | |
edge_y = [] | |
for edge in G.edges(): | |
x0, y0 = pos[edge[0]] | |
x1, y1 = pos[edge[1]] | |
edge_x.extend([x0, x1, None]) | |
edge_y.extend([y0, y1, None]) | |
edge_trace = go.Scatter( | |
x=edge_x, y=edge_y, | |
line=dict(width=1, color='#888'), | |
hoverinfo='none', | |
mode='lines' | |
) | |
node_x = [] | |
node_y = [] | |
for node in G.nodes(): | |
x, y = pos[node] | |
node_x.append(x) | |
node_y.append(y) | |
node_trace = go.Scatter( | |
x=node_x, y=node_y, | |
mode='markers+text', | |
text=[node for node in G.nodes()], | |
textposition="bottom center", | |
hoverinfo='text', | |
marker=dict( | |
showscale=False, | |
color=['lightblue' if G.nodes[node]['type'] == 'Material' else 'lightgreen' for node in G.nodes()], | |
size=20, | |
line_width=2 | |
) | |
) | |
fig_network = go.Figure(data=[edge_trace, node_trace], | |
layout=go.Layout( | |
title=f"Network Graph: {selected_material} and Connected Inscriptions", | |
titlefont_size=16, | |
showlegend=False, | |
hovermode='closest', | |
margin=dict(b=20,l=5,r=5,t=40), | |
annotations=[ dict( | |
text="", | |
showarrow=False, | |
xref="paper", yref="paper") ], | |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)) | |
) | |
st.plotly_chart(fig_network, use_container_width=True) | |
else: | |
st.info("No inscriptions found for the selected material.") | |
elif selected_authority_type == "Place": | |
# List all places from places_dict | |
place_names = [place['Name'] for place in places_dict.values()] | |
selected_place = st.selectbox("Select Place", sorted(place_names)) | |
# Find the place ID based on the selected name | |
place_id = None | |
for id_, place in places_dict.items(): | |
if place['Name'] == selected_place: | |
place_id = id_ | |
break | |
if place_id: | |
# Filter inscriptions that originate from this place | |
connected_inscriptions = df[df['Origin_ID'] == place_id] | |
st.markdown(f"### Inscriptions from **{selected_place}**") | |
st.write(f"**Total Inscriptions:** {len(connected_inscriptions)}") | |
if not connected_inscriptions.empty: | |
# Display inscriptions in a table | |
st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Material', 'Language', 'Dating', 'Encoder']]) | |
# **Plotly Visualization: Geographical Distribution of Inscriptions** | |
st.markdown("#### Geographical Distribution of Inscriptions") | |
map_df = connected_inscriptions[['Latitude', 'Longitude', 'Number']] | |
map_df = map_df.dropna(subset=['Latitude', 'Longitude']) | |
if not map_df.empty: | |
fig_map = px.scatter_geo( | |
map_df, | |
lat='Latitude', | |
lon='Longitude', | |
hover_name='Number', | |
title=f'Geographical Distribution of Inscriptions from {selected_place}', | |
template='plotly_white' | |
) | |
fig_map.update_layout( | |
geo=dict( | |
scope='world', | |
projection_type='natural earth', | |
showland=True, | |
landcolor='lightgray', | |
showcountries=True, | |
) | |
) | |
st.plotly_chart(fig_map, use_container_width=True) | |
else: | |
st.info("No geographical data available for these inscriptions.") | |
# **Plotly Visualization: Network Graph of Inscriptions and Places** | |
st.markdown("#### Network Graph of Inscriptions and Places") | |
G = nx.Graph() | |
# Add nodes | |
G.add_node(selected_place, type='Place') | |
for _, row in connected_inscriptions.iterrows(): | |
inscription_node = f"Inscription {row['Number']}" | |
G.add_node(inscription_node, type='Inscription') | |
G.add_edge(selected_place, inscription_node) | |
# Generate positions for the nodes | |
pos = nx.spring_layout(G, k=0.5, iterations=50) | |
edge_x = [] | |
edge_y = [] | |
for edge in G.edges(): | |
x0, y0 = pos[edge[0]] | |
x1, y1 = pos[edge[1]] | |
edge_x.extend([x0, x1, None]) | |
edge_y.extend([y0, y1, None]) | |
edge_trace = go.Scatter( | |
x=edge_x, y=edge_y, | |
line=dict(width=1, color='#888'), | |
hoverinfo='none', | |
mode='lines' | |
) | |
node_x = [] | |
node_y = [] | |
for node in G.nodes(): | |
x, y = pos[node] | |
node_x.append(x) | |
node_y.append(y) | |
node_trace = go.Scatter( | |
x=node_x, y=node_y, | |
mode='markers+text', | |
text=[node for node in G.nodes()], | |
textposition="bottom center", | |
hoverinfo='text', | |
marker=dict( | |
showscale=False, | |
color=['salmon' if G.nodes[node]['type'] == 'Place' else 'lightgreen' for node in G.nodes()], | |
size=20, | |
line_width=2 | |
) | |
) | |
fig_network = go.Figure(data=[edge_trace, node_trace], | |
layout=go.Layout( | |
title=f"Network Graph: {selected_place} and Connected Inscriptions", | |
titlefont_size=16, | |
showlegend=False, | |
hovermode='closest', | |
margin=dict(b=20,l=5,r=5,t=40), | |
annotations=[ dict( | |
text="", | |
showarrow=False, | |
xref="paper", yref="paper") ], | |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)) | |
) | |
st.plotly_chart(fig_network, use_container_width=True) | |
else: | |
st.info("No inscriptions found for the selected place.") | |
elif selected_authority_type == "Title": | |
# List all titles from titles_dict | |
title_names = [title['Name'] for title in titles_dict.values()] | |
selected_title = st.selectbox("Select Title", sorted(title_names)) | |
# Find the title ID based on the selected name | |
title_id = None | |
for id_, title in titles_dict.items(): | |
if title['Name'] == selected_title: | |
title_id = id_ | |
break | |
if title_id: | |
# Filter inscriptions that reference this title | |
# Assuming 'Titles' column contains comma-separated titles | |
connected_inscriptions = df[df['Titles'].str.contains(selected_title, case=False, na=False)] | |
st.markdown(f"### Inscriptions referencing **{selected_title}**") | |
st.write(f"**Total Inscriptions:** {len(connected_inscriptions)}") | |
if not connected_inscriptions.empty: | |
# Display inscriptions in a table | |
st.dataframe(connected_inscriptions[['Number', 'Publisher', 'Origin', 'Material', 'Language', 'Dating', 'Encoder']]) | |
# **Plotly Visualization: Inscriptions Referencing the Title Over Time** | |
st.markdown("#### Inscriptions Referencing the Title Over Time") | |
def extract_start_year(dating): | |
if isinstance(dating, str): | |
parts = dating.split('to') | |
try: | |
return int(parts[0].strip()) | |
except: | |
return None | |
return None | |
connected_inscriptions['Start_Year'] = connected_inscriptions['Dating'].apply(extract_start_year) | |
year_counts = connected_inscriptions['Start_Year'].dropna().astype(int).value_counts().sort_index() | |
year_counts = year_counts.reset_index() | |
year_counts.columns = ['Year', 'Count'] | |
fig_bar = px.bar( | |
year_counts, | |
x='Year', | |
y='Count', | |
labels={'Count': 'Number of Inscriptions'}, | |
title=f'Number of Inscriptions Referencing "{selected_title}" Over Time', | |
template='plotly_white' | |
) | |
st.plotly_chart(fig_bar, use_container_width=True) | |
# **Plotly Visualization: Network Graph of Inscriptions and Titles** | |
st.markdown("#### Network Graph of Inscriptions and Titles") | |
# Create a network graph using Plotly | |
G = nx.Graph() | |
# Add nodes | |
G.add_node(selected_title, type='Title') | |
for _, row in connected_inscriptions.iterrows(): | |
inscription_node = f"Inscription {row['Number']}" | |
G.add_node(inscription_node, type='Inscription') | |
G.add_edge(selected_title, inscription_node) | |
# Generate positions for the nodes | |
pos = nx.spring_layout(G, k=0.5, iterations=50) | |
edge_x = [] | |
edge_y = [] | |
for edge in G.edges(): | |
x0, y0 = pos[edge[0]] | |
x1, y1 = pos[edge[1]] | |
edge_x.extend([x0, x1, None]) | |
edge_y.extend([y0, y1, None]) | |
edge_trace = go.Scatter( | |
x=edge_x, y=edge_y, | |
line=dict(width=1, color='#888'), | |
hoverinfo='none', | |
mode='lines' | |
) | |
node_x = [] | |
node_y = [] | |
for node in G.nodes(): | |
x, y = pos[node] | |
node_x.append(x) | |
node_y.append(y) | |
node_trace = go.Scatter( | |
x=node_x, y=node_y, | |
mode='markers+text', | |
text=[node for node in G.nodes()], | |
textposition="bottom center", | |
hoverinfo='text', | |
marker=dict( | |
showscale=False, | |
color=['orange' if G.nodes[node]['type'] == 'Title' else 'lightgreen' for node in G.nodes()], | |
size=20, | |
line_width=2 | |
) | |
) | |
fig_network = go.Figure(data=[edge_trace, node_trace], | |
layout=go.Layout( | |
title=f"Network Graph: {selected_title} and Connected Inscriptions", | |
titlefont_size=16, | |
showlegend=False, | |
hovermode='closest', | |
margin=dict(b=20,l=5,r=5,t=40), | |
annotations=[ dict( | |
text="", | |
showarrow=False, | |
xref="paper", yref="paper") ], | |
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), | |
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)) | |
) | |
st.plotly_chart(fig_network, use_container_width=True) | |
else: | |
st.info("No inscriptions found referencing the selected title.") | |
# ------------------------------- | |
# Footer | |
# ------------------------------- | |
st.markdown(""" | |
--- | |
**© 2024 InscriptaNET** | |
""") | |