Spaces:

bestroi
/

InscriptaNet

Sleeping

App Files Files Community

InscriptaNet / app.py

bestroi

Update app.py

876c797 verified 7 months ago

raw

history blame

24.6 kB

	import streamlit as st
	import xml.etree.ElementTree as ET
	import pandas as pd
	from io import StringIO
	import folium
	from streamlit_folium import st_folium

	# -------------------------------
	# Authority Lists as XML Strings
	# -------------------------------

	materials_xml = """<?xml version="1.0" encoding="UTF-8"?>
	<materials>
	<material id="LAPIS">
	<name>Lapis</name>
	<name_en>Stone</name_en>
	<description>Stone used as a durable medium for inscriptions and engravings.</description>
	</material>
	<material id="ARGENTUM">
	<name>Argentum</name>
	<name_en>Silver</name_en>
	<description>Silver used in inscriptions, often for its lustrous appearance and value.</description>
	</material>
	<material id="PLUMBUM">
	<name>Plumbum</name>
	<name_en>Lead</name_en>
	<description>Lead utilized in inscriptions, valued for its malleability and ease of engraving.</description>
	</material>
	<material id="OPUS_FIGLINAE">
	<name>Opus Figlinae</name>
	<name_en>Pottery</name_en>
	<description>Pottery used as a medium for inscriptions, typically in the form of ceramic artifacts.</description>
	</material>
	</materials>
	"""

	places_xml = """<?xml version="1.0" encoding="UTF-8"?>
	<places>
	<place id="VIZE">
	<name>Vize</name>
	<geonamesLink>https://www.geonames.org/738154/vize.html</geonamesLink>
	<pleiadesLink>https://pleiades.stoa.org/places/511190</pleiadesLink>
	<latitude>40.6545</latitude>
	<longitude>28.4078</longitude>
	<description>Ancient city located in modern-day Turkey.</description>
	</place>
	<place id="PHILIPPI">
	<name>Philippi</name>
	<geonamesLink>https://www.geonames.org/734652/filippoi-philippi.html</geonamesLink>
	<pleiadesLink>https://pleiades.stoa.org/places/501482</pleiadesLink>
	<latitude>40.5044</latitude>
	<longitude>24.9722</longitude>
	<description>Ancient city in Macedonia, founded by Philip II of Macedon.</description>
	</place>
	<place id="AUGUSTA_TRAIANA">
	<name>Augusta Traiana</name>
	<geonamesLink>https://www.geonames.org/maps/google_42.4333_25.65.html</geonamesLink>
	<pleiadesLink>https://pleiades.stoa.org/places/216731</pleiadesLink>
	<latitude>42.4259</latitude>
	<longitude>25.6272</longitude>
	<description>Ancient Roman city, present-day Stara Zagora in Bulgaria.</description>
	</place>
	<place id="DYRRACHIUM">
	<name>Dyrrachium</name>
	<geonamesLink>https://www.geonames.org/3185728/durres.html</geonamesLink>
	<pleiadesLink>https://pleiades.stoa.org/places/481818</pleiadesLink>
	<latitude>41.3231</latitude>
	<longitude>19.4417</longitude>
	<description>Ancient city on the Adriatic coast, present-day Durrës in Albania.</description>
	</place>
	<place id="ANTISARA">
	<name>Antisara</name>
	<geonamesLink>https://www.geonames.org/736079/akra-kalamitsa.html</geonamesLink>
	<pleiadesLink>https://pleiades.stoa.org/places/501351</pleiadesLink>
	<latitude>39.5000</latitude>
	<longitude>20.0000</longitude>
	<description>Ancient settlement, exact modern location TBD.</description>
	</place>
	<place id="MACEDONIA">
	<name>Macedonia</name>
	<geonamesLink>-</geonamesLink>
	<pleiadesLink>-</pleiadesLink>
	<latitude>40.0000</latitude>
	<longitude>22.0000</longitude>
	<description>Historical region in Southeast Europe, encompassing parts of modern Greece, North Macedonia, and Bulgaria.</description>
	</place>
	</places>
	"""

	titles_xml = """<?xml version="1.0" encoding="UTF-8"?>
	<emperorTitles>
	<title id="IMPERATOR">
	<name>Imperator</name>
	<name_gr>Αυτοκράτορας</name_gr>
	<abbreviation>Imp.</abbreviation>
	<description>A title granted to a victorious general, later adopted as a formal title by Roman emperors.</description>
	</title>
	<title id="CAESAR">
	<name>Caesar</name>
	<name_gr>Καῖσαρ</name_gr>
	<abbreviation>Caes.</abbreviation>
	<description>A title used by Roman emperors, originally the family name of Julius Caesar.</description>
	</title>
	<title id="AUGUSTUS">
	<name>Augustus</name>
	<name_gr>-</name_gr>
	<abbreviation>Aug.</abbreviation>
	<description>The first Roman emperor's title, signifying revered or majestic status.</description>
	</title>
	</emperorTitles>
	"""

	# -------------------------------
	# Parse Authority Lists
	# -------------------------------

	def parse_materials(xml_string):
	materials = {}
	root = ET.fromstring(xml_string)
	for material in root.findall('material'):
	material_id = material.get('id')
	materials[material_id] = {
	'Name': material.find('name').text,
	'Name_EN': material.find('name_en').text,
	'Description': material.find('description').text
	}
	return materials

	def parse_places(xml_string):
	places = {}
	root = ET.fromstring(xml_string)
	for place in root.findall('place'):
	place_id = place.get('id')
	places[place_id] = {
	'Name': place.find('name').text,
	'GeoNames Link': place.find('geonamesLink').text,
	'Pleiades Link': place.find('pleiadesLink').text,
	'Latitude': float(place.find('latitude').text),
	'Longitude': float(place.find('longitude').text),
	'Description': place.find('description').text
	}
	return places

	def parse_titles(xml_string):
	titles = {}
	root = ET.fromstring(xml_string)
	for title in root.findall('title'):
	title_id = title.get('id')
	titles[title_id] = {
	'Name': title.find('name').text,
	'Name_GR': title.find('name_gr').text,
	'Abbreviation': title.find('abbreviation').text,
	'Description': title.find('description').text
	}
	return titles

	# Load authority data
	materials_dict = parse_materials(materials_xml)
	places_dict = parse_places(places_xml)
	titles_dict = parse_titles(titles_xml)

	# -------------------------------
	# Function to Find Place ID by Name (Case-Insensitive)
	# -------------------------------

	def find_place_id_by_name(name):
	"""
	Finds the place ID by matching the place name (case-insensitive).
	Returns the place ID if found, else returns the original name.
	"""
	for id_, place in places_dict.items():
	if place['Name'].strip().lower() == name.strip().lower():
	return id_
	return name # Return the original name if no match is found

	# -------------------------------
	# Function to Parse Inscriptions
	# -------------------------------

	def parse_inscriptions(xml_content):
	tree = ET.ElementTree(ET.fromstring(xml_content))
	root = tree.getroot()
	inscriptions = []
	for inscription in root.findall('inscription'):
	n = inscription.get('n')
	publisher = inscription.find('Publisher').text if inscription.find('Publisher') is not None else "N/A"

	# Handle Origin with or without 'ref' attribute
	origin_elem = inscription.find('Origin')
	if origin_elem is not None:
	origin_ref = origin_elem.get('ref')
	if origin_ref:
	origin_id = origin_ref
	else:
	origin_text = origin_elem.text.strip() if origin_elem.text else ""
	origin_id = find_place_id_by_name(origin_text)
	else:
	origin_id = "N/A"

	origin = places_dict.get(origin_id, {}).get('Name', origin_id)
	origin_geonames_link = places_dict.get(origin_id, {}).get('GeoNames Link', "#")
	origin_pleiades_link = places_dict.get(origin_id, {}).get('Pleiades Link', "#")

	# Handle Material with or without 'ref' attribute
	material_elem = inscription.find('Material')
	if material_elem is not None:
	material_ref = material_elem.get('ref')
	if material_ref:
	material_id = material_ref
	else:
	material_text = material_elem.text.strip() if material_elem.text else ""
	# Attempt to find material ID by matching the name_en
	material_id = None
	for id_, material in materials_dict.items():
	if material['Name_EN'].strip().lower() == material_text.strip().lower():
	material_id = id_
	break
	if not material_id:
	material_id = material_text # Use the text if no match found
	else:
	material_id = "N/A"

	material = materials_dict.get(material_id, {}).get('Name_EN', material_id)

	language = inscription.find('Language').text if inscription.find('Language') is not None else "N/A"

	text = "".join(inscription.find('Text').itertext()).strip() if inscription.find('Text') is not None else "N/A"

	dating = inscription.find('Dating').text if inscription.find('Dating') is not None else "N/A"
	images = inscription.find('Images').text if inscription.find('Images') is not None else "N/A"
	encoder = inscription.find('Encoder').text if inscription.find('Encoder') is not None else "N/A"

	category_terms = [term.text for term in inscription.findall('Category/term')]

	inscriptions.append({
	'Number': n,
	'Publisher': publisher,
	'Origin_ID': origin_id,
	'Origin': origin,
	'GeoNames Link': origin_geonames_link,
	'Pleiades Link': origin_pleiades_link,
	'Material_ID': material_id,
	'Material': material,
	'Language': language,
	'Text': text,
	'Dating': dating,
	'Images': images,
	'Encoder': encoder,
	'Categories': ", ".join(category_terms)
	})
	return pd.DataFrame(inscriptions)

	# -------------------------------
	# Functions to Render Editions
	# -------------------------------

	def render_diplomatic(text_element):
	lines = []
	current_line = ""
	for elem in text_element.iter():
	if elem.tag == "lb":
	if current_line:
	lines.append(current_line.strip())
	current_line = "" # Start a new line
	line_number = elem.get("n", "")
	current_line += f"{line_number} " if line_number else ""
	elif elem.tag == "supplied":
	# Process nested <expan> elements and concatenate abbreviations
	supplied_content = ""
	for sub_elem in elem.findall(".//expan"): # Nested <expan> elements
	abbr_elem = sub_elem.find("abbr")
	if abbr_elem is not None and abbr_elem.text:
	supplied_content += abbr_elem.text.upper()
	current_line += f"[{supplied_content}]"
	elif elem.tag == "expan":
	# Use only the abbreviation part
	abbr_elem = elem.find("abbr")
	if abbr_elem is not None and abbr_elem.text:
	current_line += abbr_elem.text.upper()
	elif elem.tag == "g" and elem.get("type") == "leaf":
	current_line += " LEAF "
	elif elem.tag == "title" and elem.get("type") == "emperor":
	# Include title abbreviations
	title_ref = elem.get('ref')
	title_info = titles_dict.get(title_ref, {})
	abbreviation = title_info.get('Abbreviation', '')
	current_line += abbreviation
	elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]:
	current_line += elem.text.upper()
	if current_line:
	lines.append(current_line.strip()) # Append the last line
	return "\n".join(lines)

	def render_editor(text_element):
	lines = []
	current_line = ""
	for elem in text_element.iter():
	if elem.tag == "lb":
	if current_line:
	lines.append(current_line.strip())
	current_line = "" # Start a new line
	line_number = elem.get("n", "")
	current_line += f"{line_number} " if line_number else ""
	elif elem.tag == "supplied":
	# Process nested <expan> elements with abbreviation and expansion
	supplied_content = []
	for sub_elem in elem.findall(".//expan"): # Nested <expan> elements
	abbr_elem = sub_elem.find("abbr")
	ex_elem = sub_elem.find("ex")
	abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else ""
	ex = ex_elem.text if ex_elem is not None and ex_elem.text else ""
	supplied_content.append(f"{abbr}({ex})")
	current_line += " ".join(supplied_content)
	elif elem.tag == "expan":
	# Render abbreviation and expansion
	abbr_elem = elem.find("abbr")
	ex_elem = elem.find("ex")
	abbr = abbr_elem.text if abbr_elem is not None and abbr_elem.text else ""
	ex = ex_elem.text if ex_elem is not None and ex_elem.text else ""
	current_line += f"{abbr}({ex})"
	elif elem.tag == "g" and elem.get("type") == "leaf":
	current_line += " ((leaf)) "
	elif elem.tag == "title" and elem.get("type") == "emperor":
	# Render title abbreviation and name
	title_ref = elem.get('ref')
	title_info = titles_dict.get(title_ref, {})
	abbreviation = title_info.get('Abbreviation', '')
	name_gr = title_info.get('Name_GR', '')
	current_line += f"{abbreviation} {name_gr}"
	elif elem.text and elem.tag not in ["supplied", "expan", "g", "title"]:
	current_line += elem.text
	if current_line:
	lines.append(current_line.strip()) # Append the last line
	return "\n".join(lines)

	# -------------------------------
	# Streamlit App Layout
	# -------------------------------

	st.set_page_config(page_title="Epigraphic XML Viewer", layout="wide")
	st.title("Epigraphic XML Viewer: Diplomatic and Editor Editions")

	# -------------------------------
	# Sidebar - Project Information
	# -------------------------------
	with st.sidebar:
	st.header("Project Information")
	st.markdown("""
	Epigraphic Database Viewer is a tool designed to visualize and analyze ancient inscriptions.

	Features:
	- Upload and view XML inscriptions data.
	- Explore inscriptions in various formats.
	- Visualize geographical origins on an interactive map.

	Authority Lists:
	- Materials: Details about materials used in inscriptions.
	- Places: Geographical data and descriptions.
	- Emperor Titles: Titles and abbreviations used in inscriptions.

	Developed by: Your Name or Team
	""")

	# -------------------------------
	# File uploader for Inscriptions XML
	# -------------------------------
	uploaded_file = st.file_uploader("Upload Inscriptions XML File", type=["xml"])

	if uploaded_file:
	st.success("File uploaded successfully!")
	# Read uploaded XML content
	inscriptions_content = uploaded_file.getvalue().decode("utf-8")
	else:
	st.info("No file uploaded. Using default sample XML data.")
	# Default XML data (as provided by the user)
	inscriptions_content = """<?xml version="1.0" encoding="UTF-8"?>
	<!DOCTYPE epiData SYSTEM "epiData.dtd"> <!--<!DOCTYPE epiData SYSTEM "https://raw.githubusercontent.com/Bestroi150/EpiDataBase/refs/heads/main/epiData.dtd">-->

	<epiData>
	<inscription n="1">
	<Publisher>EDCS</Publisher>
	<Origin ref="VIZE">Vize</Origin>
	<Origin-Geonames-Link>https://www.geonames.org/738154/vize.html</Origin-Geonames-Link>
	<Origin-Pleiades-Link>https://pleiades.stoa.org/places/511190</Origin-Pleiades-Link>
	<Institution ID="AE 1951, 00257"></Institution>
	<Category>
	<term>Augusti/Augustae</term>
	<term>ordo senatorius</term>
	<term>tituli sacri</term>
	<term>tria nomina</term>
	<term>viri</term>
	</Category>
	<Material ref="LAPIS">lapis</Material>
	<Language>Greek</Language>
	<Text>
	<lb n="1"/>ἀγαθῇ τύχῃ
	<lb n="2"/>ὑπὲρ τῆς τοῦ <title type="emperor" ref="IMPERATOR">Αὐτοκράτορος</title>
	<lb n="3" break="no"/><expan><abbr>T</abbr><ex>ίτου</ex></expan> <expan>Αἰλ<ex>ίου</ex></expan> <persName type="emperor">Ἁδριανοῦ Ἀντωνείνου</persName> <title type="emperor">Καί
	<lb n="4"/>σαρος</title><expan>Σεβ<ex>αστοῦ</ex></expan> Εὐσεβοῦς καὶ Οὐήρου Καίσαρ
	<lb n="5"/>ος νείκης τε καὶ αἰωνίου διαμονῆς καὶ τοῦ
	<lb n="6"/>σύμπαντος αὐτῶν οἴκου ἱερᾶς τε
	<lb n="7"/>συνκλήτου καὶ δήμου Ῥωμαίων
	<lb n="8" break="no"/>ἡγεμονεύοντος <place type="province">ἐπαρχείας Θρᾴκης</place>
	<lb n="9"/><persName type="official"> <expan>Γ<ex>αΐου</ex></expan> Ἰουλίου <expan>Κομ<ex>μ</ex></expan>όδου</persName> <title type="official">πρεσβ<ex>ευτοῦ</ex></title> <expan>Σεβ<ex>αστοῦ</ex></expan>
	<lb n="10"/>ἀντιστρατήγου ἡ <place type="city">πόλις Βιζυηνῶν</place>
	<lb n="11"/>κατεσκεύασεν τοὺς πυργοὺς διὰ
	<lb n="12" break="no"/>ἐπιμελητῶν Φίρμου Αυλουπορε
	<lb n="13"/>ος καὶ Αυλουκενθου Δυτουκενθου
	<lb n="14"/>καὶ Ραζδου Ὑακίνθου εὐτυχεῖτε
	</Text>
	<Dating>155 to 155</Dating>
	<Images>https://db.edcs.eu/epigr/ae/ae1951/ae1951-74.pdf</Images>
	<Encoder>Admin</Encoder>
	</inscription>

	</epiData>
	"""

	# -------------------------------
	# Parse Inscriptions
	# -------------------------------

	try:
	df = parse_inscriptions(inscriptions_content)
	except ET.ParseError as e:
	st.error(f"Error parsing XML: {e}")
	st.stop()

	# -------------------------------
	# Tabs for Different Views
	# -------------------------------
	tabs = st.tabs(["Raw XML", "DataFrame", "Diplomatic Edition", "Editor Edition", "Visualization"])

	# -------------------------------
	# Raw XML Tab
	# -------------------------------
	with tabs[0]:
	st.subheader("Raw XML Content")
	st.code(inscriptions_content, language="xml")

	# -------------------------------
	# DataFrame Tab
	# -------------------------------
	with tabs[1]:
	st.subheader("Inscriptions Data")
	st.dataframe(df)

	# -------------------------------
	# Diplomatic Edition Tab
	# -------------------------------
	with tabs[2]:
	st.subheader("Diplomatic Edition")
	# Select Inscription
	inscription_numbers = df['Number'].tolist()
	selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers)
	selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0]

	# Parse the selected inscription's XML to get the Text element
	tree = ET.ElementTree(ET.fromstring(inscriptions_content))
	root = tree.getroot()
	inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']")
	text_element = inscription_elem.find("Text") if inscription_elem is not None else None

	if text_element is not None:
	diplomatic_text = render_diplomatic(text_element)
	st.code(diplomatic_text, language="plaintext")
	else:
	st.warning("No text found for the selected inscription.")

	# -------------------------------
	# Editor Edition Tab
	# -------------------------------
	with tabs[3]:
	st.subheader("Editor Edition")
	# Select Inscription
	inscription_numbers = df['Number'].tolist()
	selected_inscription_num = st.selectbox("Select Inscription Number", inscription_numbers, key='editor_select')
	selected_inscription = df[df['Number'] == selected_inscription_num].iloc[0]

	# Parse the selected inscription's XML to get the Text element
	tree = ET.ElementTree(ET.fromstring(inscriptions_content))
	root = tree.getroot()
	inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']")
	text_element = inscription_elem.find("Text") if inscription_elem is not None else None

	if text_element is not None:
	editor_text = render_editor(text_element)
	st.code(editor_text, language="plaintext")
	else:
	st.warning("No text found for the selected inscription.")

	# -------------------------------
	# Visualization Tab
	# -------------------------------
	with tabs[4]:
	st.subheader("Visualization")

	# Extract categories
	all_categories = set()
	for categories in df['Categories']:
	for cat in categories.split(", "):
	all_categories.add(cat)

	# Category filtering
	selected_categories = st.multiselect("Filter by Category", sorted(all_categories))

	if selected_categories:
	filtered_df = df[df['Categories'].apply(lambda x: any(cat in x.split(", ") for cat in selected_categories))]
	else:
	filtered_df = df.copy()

	# Merge with places to get coordinates
	def get_coordinates(origin_id):
	place = places_dict.get(origin_id, {})
	return place.get('Latitude'), place.get('Longitude')

	# Apply the function to get Latitude and Longitude
	filtered_df['Latitude'], filtered_df['Longitude'] = zip(*filtered_df['Origin_ID'].apply(get_coordinates))

	# Drop entries without coordinates
	map_df = filtered_df.dropna(subset=['Latitude', 'Longitude'])

	if not map_df.empty:
	# Create a Folium map centered around the average coordinates
	avg_lat = map_df['Latitude'].mean()
	avg_lon = map_df['Longitude'].mean()
	folium_map = folium.Map(location=[avg_lat, avg_lon], zoom_start=6)

	# Add markers to the map
	for _, row in map_df.iterrows():
	popup_content = f"""
	<b>Inscription Number:</b> {row['Number']}<br>
	<b>Publisher:</b> {row['Publisher']}<br>
	<b>Material:</b> {row['Material']}<br>
	<b>Language:</b> {row['Language']}<br>
	<b>Dating:</b> {row['Dating']}<br>
	<b>Encoder:</b> {row['Encoder']}<br>
	<b>Categories:</b> {row['Categories']}<br>
	<b>Text:</b> {row['Text']}<br>
	"""
	if row['Images'] and row['Images'] != "N/A":
	popup_content += f'<a href="{row["Images"]}" target="_blank">View Images</a><br>'
	folium.Marker(
	location=[row['Latitude'], row['Longitude']],
	popup=folium.Popup(popup_content, max_width=300),
	tooltip=f"Inscription {row['Number']}"
	).add_to(folium_map)

	# Display the Folium map using streamlit_folium
	st_folium(folium_map, width=700, height=500)
	else:
	st.write("No inscriptions to display on the map based on the selected filters.")

	st.dataframe(filtered_df)

	# Detailed View
	for _, row in filtered_df.iterrows():
	with st.expander(f"Inscription {row['Number']}"):
	st.markdown(f"Publisher: {row['Publisher']}")
	st.markdown(f"Origin: {row['Origin']} ([GeoNames Link]({row['GeoNames Link']}), [Pleiades Link]({row['Pleiades Link']}))")
	st.markdown(f"Material: {row['Material']} - {materials_dict.get(row['Material_ID'], {}).get('Description', '')}")
	st.markdown(f"Language: {row['Language']}")
	st.markdown(f"Dating: {row['Dating']}")
	st.markdown(f"Encoder: {row['Encoder']}")
	st.markdown(f"Categories: {row['Categories']}")
	st.markdown(f"Text:\n\n{row['Text']}")
	if row['Images'] and row['Images'] != "N/A":
	st.markdown(f"[View Images]({row['Images']})")
	# Display material description
	material_desc = materials_dict.get(row['Material_ID'], {}).get('Description', "No description available.")
	st.markdown(f"Material Description: {material_desc}")
	# Display place description
	place_desc = places_dict.get(row['Origin_ID'], {}).get('Description', "No description available.")
	st.markdown(f"Place Description: {place_desc}")

	# -------------------------------
	# Footer
	# -------------------------------
	st.markdown("""
	---
	© 2024 InscriptaNET
	""")