bestroi commited on
Commit
5045934
·
verified ·
1 Parent(s): 7767b59

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -32
app.py CHANGED
@@ -4,6 +4,7 @@ import pandas as pd
4
  from io import StringIO
5
  import folium
6
  from streamlit_folium import st_folium
 
7
 
8
  # -------------------------------
9
  # Authority Lists as XML Strings
@@ -449,8 +450,52 @@ with tabs[1]:
449
  # -------------------------------
450
  # Diplomatic Edition Tab
451
  # -------------------------------
452
- import streamlit as st
453
- import xml.etree.ElementTree as ET
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
 
455
  # Assuming 'tabs' and 'df' are already defined in your Streamlit app
456
  with tabs[2]:
@@ -467,42 +512,13 @@ with tabs[2]:
467
  inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']")
468
  text_element = inscription_elem.find("Text") if inscription_elem is not None else None
469
 
470
- def render_diplomatic(text_elem):
471
- """
472
- Transforms the XML Text element into uppercase Greek text with line breaks at <lb> tags.
473
- """
474
- lines = []
475
- current_line = []
476
-
477
- for elem in text_elem.iter():
478
- if elem.tag == 'lb':
479
- # When encountering an <lb> tag, finalize the current line
480
- line_text = ''.join(current_line).strip().upper()
481
- if line_text:
482
- lines.append(line_text)
483
- current_line = []
484
- elif elem.text:
485
- # Append text, handling any nested tags
486
- current_line.append(elem.text)
487
- if elem.tail:
488
- # Append tail text after a nested tag
489
- current_line.append(elem.tail)
490
-
491
- # Add the last line if exists
492
- if current_line:
493
- line_text = ''.join(current_line).strip().upper()
494
- if line_text:
495
- lines.append(line_text)
496
-
497
- # Join all lines with newline characters
498
- return '\n'.join(lines)
499
-
500
  if text_element is not None:
501
  diplomatic_text = render_diplomatic(text_element)
502
  st.code(diplomatic_text, language="plaintext")
503
  else:
504
  st.warning("No text found for the selected inscription.")
505
 
 
506
  # -------------------------------
507
  # Editor Edition Tab
508
  # -------------------------------
 
4
  from io import StringIO
5
  import folium
6
  from streamlit_folium import st_folium
7
+ import unicodedata
8
 
9
  # -------------------------------
10
  # Authority Lists as XML Strings
 
450
  # -------------------------------
451
  # Diplomatic Edition Tab
452
  # -------------------------------
453
+
454
+ # Function to remove diacritics from text
455
+ def remove_diacritics(text):
456
+ """
457
+ Removes diacritics from the input text.
458
+ """
459
+ normalized_text = unicodedata.normalize('NFD', text)
460
+ return ''.join(
461
+ char for char in normalized_text
462
+ if unicodedata.category(char) != 'Mn'
463
+ )
464
+
465
+ # Function to process the Text element
466
+ def render_diplomatic(text_elem):
467
+ """
468
+ Transforms the XML Text element into uppercase Greek text without diacritics and spaces,
469
+ with line breaks at <lb> tags.
470
+ """
471
+ lines = []
472
+ current_line = []
473
+
474
+ for elem in text_elem.iter():
475
+ if elem.tag == 'lb':
476
+ # Finalize the current line
477
+ line_text = ''.join(current_line).strip()
478
+ if line_text:
479
+ # Remove diacritics and spaces, then convert to uppercase
480
+ line_text = remove_diacritics(line_text).replace(' ', '').upper()
481
+ lines.append(line_text)
482
+ current_line = []
483
+ elif elem.text:
484
+ # Append text, handling any nested tags
485
+ current_line.append(elem.text)
486
+ if elem.tail:
487
+ # Append tail text after a nested tag
488
+ current_line.append(elem.tail)
489
+
490
+ # Add the last line if exists
491
+ if current_line:
492
+ line_text = ''.join(current_line).strip()
493
+ if line_text:
494
+ line_text = remove_diacritics(line_text).replace(' ', '').upper()
495
+ lines.append(line_text)
496
+
497
+ # Join all lines with newline characters
498
+ return '\n'.join(lines)
499
 
500
  # Assuming 'tabs' and 'df' are already defined in your Streamlit app
501
  with tabs[2]:
 
512
  inscription_elem = root.find(f".//inscription[@n='{selected_inscription_num}']")
513
  text_element = inscription_elem.find("Text") if inscription_elem is not None else None
514
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
515
  if text_element is not None:
516
  diplomatic_text = render_diplomatic(text_element)
517
  st.code(diplomatic_text, language="plaintext")
518
  else:
519
  st.warning("No text found for the selected inscription.")
520
 
521
+
522
  # -------------------------------
523
  # Editor Edition Tab
524
  # -------------------------------