Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,447 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from lxml import etree
|
3 |
+
from pathlib import Path
|
4 |
+
from io import BytesIO
|
5 |
+
from collections import defaultdict
|
6 |
+
|
7 |
+
NS = {'tei': 'http://www.tei-c.org/ns/1.0'}
|
8 |
+
|
9 |
+
def parse_xml(file_path):
|
10 |
+
"""Parses an XML file and returns the tree."""
|
11 |
+
try:
|
12 |
+
parser = etree.XMLParser(remove_blank_text=True)
|
13 |
+
tree = etree.parse(str(file_path), parser)
|
14 |
+
return tree
|
15 |
+
except Exception as e:
|
16 |
+
st.error(f"Error parsing XML file `{file_path.name}`: {e}")
|
17 |
+
return None
|
18 |
+
|
19 |
+
def get_all_authors(parsed_trees):
|
20 |
+
"""Extracts all unique authors from the list of XML trees."""
|
21 |
+
authors = set()
|
22 |
+
for tree in parsed_trees:
|
23 |
+
# From bibliography
|
24 |
+
bib_authors = tree.xpath('//tei:bibl/tei:author/tei:persName', namespaces=NS)
|
25 |
+
for author in bib_authors:
|
26 |
+
if author.text:
|
27 |
+
authors.add(author.text.strip())
|
28 |
+
return sorted(authors)
|
29 |
+
|
30 |
+
def get_all_keywords(parsed_trees):
|
31 |
+
"""Extracts all unique keywords from the list of XML trees."""
|
32 |
+
keywords = set()
|
33 |
+
for tree in parsed_trees:
|
34 |
+
keyword_items = tree.xpath('//tei:keywords/tei:list/tei:item', namespaces=NS)
|
35 |
+
for item in keyword_items:
|
36 |
+
if item.text:
|
37 |
+
parts = [kw.strip() for kw in item.text.split(',')]
|
38 |
+
keywords.update(parts)
|
39 |
+
return sorted(keywords)
|
40 |
+
|
41 |
+
def get_all_place_names(parsed_trees):
|
42 |
+
"""Extracts all unique place names from the list of XML trees."""
|
43 |
+
places = set()
|
44 |
+
for tree in parsed_trees:
|
45 |
+
provenance_places = tree.xpath('//tei:provenance/tei:placeName', namespaces=NS)
|
46 |
+
for place in provenance_places:
|
47 |
+
if place.text and place.text.lower() != 'none':
|
48 |
+
places.add(place.text.strip())
|
49 |
+
|
50 |
+
location_names = tree.xpath('//tei:location/tei:name[@type="place"]', namespaces=NS)
|
51 |
+
for name in location_names:
|
52 |
+
if name.text and name.text.lower() != 'none':
|
53 |
+
places.add(name.text.strip())
|
54 |
+
|
55 |
+
contemporary_names = tree.xpath('//tei:div[@type="commentary"]//tei:name[@type="contemporary"]', namespaces=NS)
|
56 |
+
for name in contemporary_names:
|
57 |
+
if name.text and name.text.lower() != 'none':
|
58 |
+
places.add(name.text.strip())
|
59 |
+
|
60 |
+
current_names = tree.xpath('//tei:name[@type="current"]', namespaces=NS)
|
61 |
+
for name in current_names:
|
62 |
+
if name.text and name.text.lower() != 'none':
|
63 |
+
places.add(name.text.strip())
|
64 |
+
return sorted(places)
|
65 |
+
|
66 |
+
def build_author_mappings(parsed_trees, xml_files):
|
67 |
+
"""
|
68 |
+
Builds mappings from authors to their associated places and keywords.
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
author_to_places (dict): Maps each author to a set of associated places.
|
72 |
+
author_to_keywords (dict): Maps each author to a set of associated keywords.
|
73 |
+
"""
|
74 |
+
author_to_places = defaultdict(set)
|
75 |
+
author_to_keywords = defaultdict(set)
|
76 |
+
|
77 |
+
for tree in parsed_trees:
|
78 |
+
# Extract authors
|
79 |
+
authors = set()
|
80 |
+
bib_authors = tree.xpath('//tei:bibl/tei:author/tei:persName', namespaces=NS)
|
81 |
+
for author in bib_authors:
|
82 |
+
if author.text:
|
83 |
+
authors.add(author.text.strip())
|
84 |
+
|
85 |
+
# Extract places
|
86 |
+
places = set()
|
87 |
+
provenance_places = tree.xpath('//tei:provenance/tei:placeName', namespaces=NS)
|
88 |
+
for place in provenance_places:
|
89 |
+
if place.text and place.text.lower() != 'none':
|
90 |
+
places.add(place.text.strip())
|
91 |
+
|
92 |
+
location_names = tree.xpath('//tei:location/tei:name[@type="place"]', namespaces=NS)
|
93 |
+
for name in location_names:
|
94 |
+
if name.text and name.text.lower() != 'none':
|
95 |
+
places.add(name.text.strip())
|
96 |
+
|
97 |
+
contemporary_names = tree.xpath('//tei:div[@type="commentary"]//tei:name[@type="contemporary"]', namespaces=NS)
|
98 |
+
for name in contemporary_names:
|
99 |
+
if name.text and name.text.lower() != 'none':
|
100 |
+
places.add(name.text.strip())
|
101 |
+
|
102 |
+
current_names = tree.xpath('//tei:name[@type="current"]', namespaces=NS)
|
103 |
+
for name in current_names:
|
104 |
+
if name.text and name.text.lower() != 'none':
|
105 |
+
places.add(name.text.strip())
|
106 |
+
|
107 |
+
keywords = set()
|
108 |
+
keyword_items = tree.xpath('//tei:keywords/tei:list/tei:item', namespaces=NS)
|
109 |
+
for item in keyword_items:
|
110 |
+
if item.text:
|
111 |
+
parts = [kw.strip() for kw in item.text.split(',')]
|
112 |
+
keywords.update(parts)
|
113 |
+
|
114 |
+
for author in authors:
|
115 |
+
author_to_places[author].update(places)
|
116 |
+
author_to_keywords[author].update(keywords)
|
117 |
+
|
118 |
+
return author_to_places, author_to_keywords
|
119 |
+
|
120 |
+
def get_commentary(tree):
|
121 |
+
"""Extracts commentary sections from a single XML tree."""
|
122 |
+
commentaries = tree.xpath('//tei:div[@type="commentary"]', namespaces=NS)
|
123 |
+
commentary_list = []
|
124 |
+
for comm in commentaries:
|
125 |
+
subtype = comm.get('subtype', 'general')
|
126 |
+
content = etree.tostring(comm, pretty_print=True, encoding='unicode')
|
127 |
+
commentary_list.append({'subtype': subtype, 'content': content})
|
128 |
+
return commentary_list
|
129 |
+
|
130 |
+
def get_editions(tree):
|
131 |
+
"""Extracts edition sections from a single XML tree."""
|
132 |
+
editions = tree.xpath('//tei:div[@type="edition"]', namespaces=NS)
|
133 |
+
edition_list = []
|
134 |
+
for edition in editions:
|
135 |
+
|
136 |
+
lang = edition.get('{http://www.w3.org/XML/1998/namespace}lang', 'unknown')
|
137 |
+
content = etree.tostring(edition, pretty_print=True, encoding='unicode')
|
138 |
+
edition_list.append({'lang': lang, 'content': content})
|
139 |
+
return edition_list
|
140 |
+
|
141 |
+
def search_by_author(tree, author_query):
|
142 |
+
"""Searches for the author in titleStmt and bibliography."""
|
143 |
+
results = []
|
144 |
+
|
145 |
+
bib_authors = tree.xpath('//tei:bibl/tei:author/tei:persName', namespaces=NS)
|
146 |
+
for author in bib_authors:
|
147 |
+
if author.text and author_query.lower() in author.text.lower():
|
148 |
+
results.append(f"Bibliography Author: {author.text}")
|
149 |
+
return results
|
150 |
+
|
151 |
+
def search_by_place(tree, place_query):
|
152 |
+
"""
|
153 |
+
Searches for the place in provenance, contemporary names, and location geo elements.
|
154 |
+
|
155 |
+
Parameters:
|
156 |
+
tree (etree.Element): Parsed XML tree.
|
157 |
+
place_query (str): The place name to search for.
|
158 |
+
|
159 |
+
Returns:
|
160 |
+
list: A list of strings describing where the place was found.
|
161 |
+
"""
|
162 |
+
results = []
|
163 |
+
place_query_lower = place_query.lower()
|
164 |
+
|
165 |
+
provenance_places = tree.xpath('//tei:provenance/tei:placeName', namespaces=NS)
|
166 |
+
for place in provenance_places:
|
167 |
+
if place.text and place_query_lower in place.text.lower() and place.text.lower() != "none":
|
168 |
+
results.append(f"Provenance Place: {place.text.strip()}")
|
169 |
+
|
170 |
+
contemporary_names = tree.xpath(
|
171 |
+
'//tei:div[@type="commentary" and @subtype="general"]//tei:name[@type="contemporary"]',
|
172 |
+
namespaces=NS
|
173 |
+
)
|
174 |
+
for name in contemporary_names:
|
175 |
+
if name.text and place_query_lower in name.text.lower():
|
176 |
+
results.append(f"Contemporary Name: {name.text.strip()}")
|
177 |
+
|
178 |
+
geo_elements = tree.xpath('//tei:location//tei:geo', namespaces=NS)
|
179 |
+
for geo in geo_elements:
|
180 |
+
if geo.text and place_query_lower in geo.text.lower() and geo.text.lower() != "none":
|
181 |
+
results.append(f"Location Geo: {geo.text.strip()}")
|
182 |
+
|
183 |
+
return results
|
184 |
+
|
185 |
+
def search_by_keyword(tree, keyword):
|
186 |
+
"""Searches for the keyword in keywords and commentary segments."""
|
187 |
+
results = []
|
188 |
+
keyword_items = tree.xpath('//tei:keywords/tei:list/tei:item', namespaces=NS)
|
189 |
+
for item in keyword_items:
|
190 |
+
if item.text and keyword.lower() in item.text.lower():
|
191 |
+
results.append(f"Keyword: {item.text}")
|
192 |
+
commentary_segs = tree.xpath('//tei:div[@type="commentary"]//tei:seg', namespaces=NS)
|
193 |
+
for seg in commentary_segs:
|
194 |
+
if seg.text and keyword.lower() in seg.text.lower():
|
195 |
+
results.append(f"Commentary Segment: {seg.text}")
|
196 |
+
return results
|
197 |
+
|
198 |
+
def display_tei_header(tree):
|
199 |
+
title = tree.xpath('//tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:title', namespaces=NS)
|
200 |
+
author = tree.xpath('//tei:teiHeader/tei:fileDesc/tei:titleStmt/tei:author/tei:persName', namespaces=NS)
|
201 |
+
publication = tree.xpath('//tei:teiHeader/tei:fileDesc/tei:publicationStmt/tei:publisher', namespaces=NS)
|
202 |
+
date = tree.xpath('//tei:teiHeader/tei:fileDesc/tei:publicationStmt/tei:date', namespaces=NS)
|
203 |
+
|
204 |
+
if title:
|
205 |
+
st.write(f"**Title:** {title[0].text}")
|
206 |
+
if author:
|
207 |
+
st.write(f"**Author:** {author[0].text}")
|
208 |
+
if publication:
|
209 |
+
st.write(f"**Publisher:** {publication[0].text}")
|
210 |
+
if date:
|
211 |
+
st.write(f"**Date:** {date[0].text}")
|
212 |
+
|
213 |
+
def display_code_wrapped(content):
|
214 |
+
"""
|
215 |
+
Custom function to display code with wrapping using st.markdown and HTML.
|
216 |
+
This avoids horizontal scrolling by wrapping long lines.
|
217 |
+
"""
|
218 |
+
st.markdown(
|
219 |
+
f"""
|
220 |
+
<div style="white-space: pre-wrap; word-wrap: break-word; font-size:14px; background-color: #f5f5f5; padding: 10px; border-radius: 5px; overflow: hidden;">
|
221 |
+
<code>{content}</code>
|
222 |
+
</div>
|
223 |
+
""",
|
224 |
+
unsafe_allow_html=True
|
225 |
+
)
|
226 |
+
|
227 |
+
def main():
|
228 |
+
st.set_page_config(page_title="DigitalSEE TEI XML Viewer", layout="wide")
|
229 |
+
|
230 |
+
st.markdown(
|
231 |
+
"""
|
232 |
+
<style>
|
233 |
+
/* Enable code wrapping in st.code blocks */
|
234 |
+
pre, code {
|
235 |
+
white-space: pre-wrap !important; /* Allows wrapping */
|
236 |
+
word-wrap: break-word !important; /* Breaks long words */
|
237 |
+
overflow-x: hidden !important; /* Hides horizontal scrollbar */
|
238 |
+
}
|
239 |
+
/* Adjust font size for better fit */
|
240 |
+
.streamlit-expanderHeader, pre, code {
|
241 |
+
font-size: 14px !important;
|
242 |
+
}
|
243 |
+
/* Ensure the container doesn't force a minimum width */
|
244 |
+
.streamlit-expander, .block-container {
|
245 |
+
max-width: 100% !important;
|
246 |
+
}
|
247 |
+
/* Optional: Style for the code background */
|
248 |
+
pre {
|
249 |
+
background-color: #f5f5f5 !important;
|
250 |
+
padding: 10px !important;
|
251 |
+
border-radius: 5px !important;
|
252 |
+
}
|
253 |
+
</style>
|
254 |
+
""",
|
255 |
+
unsafe_allow_html=True
|
256 |
+
)
|
257 |
+
|
258 |
+
st.title("π DigitalSEE TEI XML Viewer")
|
259 |
+
|
260 |
+
|
261 |
+
xml_folder = Path("./xmls")
|
262 |
+
|
263 |
+
if not xml_folder.exists() or not xml_folder.is_dir():
|
264 |
+
st.error(f"The specified folder `{xml_folder}` does not exist or is not a directory.")
|
265 |
+
st.stop()
|
266 |
+
|
267 |
+
xml_files = list(xml_folder.glob("*.xml"))
|
268 |
+
if not xml_files:
|
269 |
+
st.info(f"No XML files found in the folder `{xml_folder}`.")
|
270 |
+
st.stop()
|
271 |
+
|
272 |
+
st.sidebar.header("π XML Files Overview")
|
273 |
+
st.sidebar.write(f"**Total XML Files Loaded:** {len(xml_files)}")
|
274 |
+
|
275 |
+
parsed_trees = []
|
276 |
+
valid_files = []
|
277 |
+
for file in xml_files:
|
278 |
+
tree = parse_xml(file)
|
279 |
+
if tree is not None:
|
280 |
+
parsed_trees.append(tree)
|
281 |
+
valid_files.append(file)
|
282 |
+
|
283 |
+
if not parsed_trees:
|
284 |
+
st.error("No valid XML files were parsed successfully.")
|
285 |
+
st.stop()
|
286 |
+
|
287 |
+
all_authors = get_all_authors(parsed_trees)
|
288 |
+
all_keywords = get_all_keywords(parsed_trees)
|
289 |
+
all_place_names = get_all_place_names(parsed_trees)
|
290 |
+
|
291 |
+
author_to_places, author_to_keywords = build_author_mappings(parsed_trees, valid_files)
|
292 |
+
|
293 |
+
st.header("π Search TEI XML Files")
|
294 |
+
|
295 |
+
search_col1, search_col2, search_col3 = st.columns(3)
|
296 |
+
|
297 |
+
with search_col1:
|
298 |
+
st.markdown("**Search by Author**")
|
299 |
+
selected_author = st.selectbox("Select Author", options=["-- Select Author --"] + all_authors, key="author_select")
|
300 |
+
|
301 |
+
if selected_author != "-- Select Author --":
|
302 |
+
filtered_places = sorted(author_to_places[selected_author])
|
303 |
+
filtered_keywords = sorted(author_to_keywords[selected_author])
|
304 |
+
else:
|
305 |
+
filtered_places = all_place_names
|
306 |
+
filtered_keywords = all_keywords
|
307 |
+
|
308 |
+
with search_col2:
|
309 |
+
st.markdown("**Search by Place Name**")
|
310 |
+
selected_place = st.selectbox("Select Place", options=["-- Select Place --"] + filtered_places, key="place_select")
|
311 |
+
|
312 |
+
with search_col3:
|
313 |
+
st.markdown("**Search by Keyword**")
|
314 |
+
selected_keyword = st.selectbox("Select Keyword", options=["-- Select Keyword --"] + filtered_keywords, key="keyword_select")
|
315 |
+
|
316 |
+
|
317 |
+
if st.button("π Search"):
|
318 |
+
st.subheader("π Search Results")
|
319 |
+
|
320 |
+
matched_files = set(valid_files)
|
321 |
+
|
322 |
+
|
323 |
+
if selected_author != "-- Select Author --":
|
324 |
+
author_matched = set()
|
325 |
+
for tree, file in zip(parsed_trees, valid_files):
|
326 |
+
if search_by_author(tree, selected_author):
|
327 |
+
author_matched.add(file)
|
328 |
+
matched_files = matched_files.intersection(author_matched)
|
329 |
+
|
330 |
+
|
331 |
+
if selected_place != "-- Select Place --":
|
332 |
+
place_matched = set()
|
333 |
+
for tree, file in zip(parsed_trees, valid_files):
|
334 |
+
if search_by_place(tree, selected_place):
|
335 |
+
place_matched.add(file)
|
336 |
+
matched_files = matched_files.intersection(place_matched)
|
337 |
+
|
338 |
+
|
339 |
+
if selected_keyword != "-- Select Keyword --":
|
340 |
+
keyword_matched = set()
|
341 |
+
for tree, file in zip(parsed_trees, valid_files):
|
342 |
+
if search_by_keyword(tree, selected_keyword):
|
343 |
+
keyword_matched.add(file)
|
344 |
+
matched_files = matched_files.intersection(keyword_matched)
|
345 |
+
|
346 |
+
if matched_files:
|
347 |
+
st.write(f"**Total Matches:** {len(matched_files)}")
|
348 |
+
for file in matched_files:
|
349 |
+
tree = parse_xml(file)
|
350 |
+
if tree is not None:
|
351 |
+
with st.expander(f"π {file.name}"):
|
352 |
+
display_tei_header(tree)
|
353 |
+
|
354 |
+
commentaries = get_commentary(tree)
|
355 |
+
if commentaries:
|
356 |
+
st.markdown("**Commentary Sections:**")
|
357 |
+
for idx, comm in enumerate(commentaries, start=1):
|
358 |
+
st.markdown(f"**Commentary {idx} - {comm['subtype']}**")
|
359 |
+
st.code(comm['content'], language='xml')
|
360 |
+
|
361 |
+
else:
|
362 |
+
st.write("No commentary sections found.")
|
363 |
+
|
364 |
+
editions = get_editions(tree)
|
365 |
+
if editions:
|
366 |
+
st.markdown("**Edition Sections:**")
|
367 |
+
for idx, edition in enumerate(editions, start=1):
|
368 |
+
st.markdown(f"**Edition {idx} - Language: {edition['lang']}**")
|
369 |
+
st.code(edition['content'], language='xml')
|
370 |
+
|
371 |
+
else:
|
372 |
+
st.write("No edition sections found.")
|
373 |
+
|
374 |
+
associated_places = sorted(author_to_places.get(selected_author, set())) if selected_author != "-- Select Author --" else sorted(get_all_place_names([tree]))
|
375 |
+
associated_keywords = sorted(author_to_keywords.get(selected_author, set())) if selected_author != "-- Select Author --" else sorted(get_all_keywords([tree]))
|
376 |
+
|
377 |
+
if associated_places:
|
378 |
+
st.markdown("**Associated Places:**")
|
379 |
+
st.write(", ".join(associated_places))
|
380 |
+
if associated_keywords:
|
381 |
+
st.markdown("**Associated Keywords:**")
|
382 |
+
st.write(", ".join(associated_keywords))
|
383 |
+
|
384 |
+
|
385 |
+
buffer = BytesIO()
|
386 |
+
tree.write(buffer, pretty_print=True, encoding='utf-8', xml_declaration=True)
|
387 |
+
buffer.seek(0)
|
388 |
+
st.download_button(
|
389 |
+
label="π₯ Download XML",
|
390 |
+
data=buffer,
|
391 |
+
file_name=f"matched_{file.name}",
|
392 |
+
mime="application/xml"
|
393 |
+
)
|
394 |
+
else:
|
395 |
+
st.write("No matching files found for the given search criteria.")
|
396 |
+
|
397 |
+
with st.expander("π View All Loaded XML Files"):
|
398 |
+
for tree, file in zip(parsed_trees, valid_files):
|
399 |
+
with st.container():
|
400 |
+
st.markdown(f"### π {file.name}")
|
401 |
+
display_tei_header(tree)
|
402 |
+
|
403 |
+
commentaries = get_commentary(tree)
|
404 |
+
if commentaries:
|
405 |
+
st.markdown("**Commentary Sections:**")
|
406 |
+
for idx, comm in enumerate(commentaries, start=1):
|
407 |
+
st.markdown(f"**Commentary {idx} - {comm['subtype']}**")
|
408 |
+
st.code(comm['content'], language='xml')
|
409 |
+
|
410 |
+
else:
|
411 |
+
st.write("No commentary sections found.")
|
412 |
+
|
413 |
+
editions = get_editions(tree)
|
414 |
+
if editions:
|
415 |
+
st.markdown("**Edition Sections:**")
|
416 |
+
for idx, edition in enumerate(editions, start=1):
|
417 |
+
st.markdown(f"**Edition {idx} - Language: {edition['lang']}**")
|
418 |
+
st.code(edition['content'], language='xml')
|
419 |
+
|
420 |
+
else:
|
421 |
+
st.write("No edition sections found.")
|
422 |
+
|
423 |
+
st.sidebar.markdown("---")
|
424 |
+
st.sidebar.header("Simple Querying Interface")
|
425 |
+
st.sidebar.write(
|
426 |
+
"Quickly search and filter TEI XML files to find relevant information or themes."
|
427 |
+
)
|
428 |
+
|
429 |
+
st.sidebar.header("XML Code Viewer")
|
430 |
+
st.sidebar.write(
|
431 |
+
"View detailed XML code for commentaries and editions in their original format."
|
432 |
+
)
|
433 |
+
|
434 |
+
st.sidebar.header("Downloadable Entries")
|
435 |
+
st.sidebar.write(
|
436 |
+
"Download entries for offline access and further analysis."
|
437 |
+
)
|
438 |
+
|
439 |
+
st.sidebar.header("Comprehensive Meta Information")
|
440 |
+
st.sidebar.write(
|
441 |
+
"Each entry includes rich metadata, such as XML file author details."
|
442 |
+
)
|
443 |
+
|
444 |
+
|
445 |
+
|
446 |
+
if __name__ == "__main__":
|
447 |
+
main()
|