Bram Vanroy
add app
d3a07ee
raw
history blame
4.11 kB
import base64
from io import StringIO
from math import ceil
from utils import get_resources, simplify
import streamlit as st
st.set_page_config(
page_title="Text Simplification in Dutch",
page_icon="🏃"
)
BATCH_SIZE = 8
if "text_to_simplify" not in st.session_state:
st.session_state["text_to_simplify"] = None
st.title("🏃 Text Simplification in Dutch")
fupload_check = st.checkbox("File upload?")
st.markdown(
"Make sure that the file or text in the text box contains **one sentence per line**. Empty lines will"
" be removed."
)
if fupload_check:
uploaded_file = st.file_uploader("Text file", label_visibility="collapsed")
if uploaded_file is not None:
stringio = StringIO(uploaded_file.getvalue().decode("utf-8"))
st.session_state["text_to_simplify"] = stringio.read().strip()
else:
st.session_state["text_to_simplify"] = None
else:
st.session_state["text_to_simplify"] = st.text_area(
label="Sentences to translate", label_visibility="collapsed", height=200,
value="Met het naderen van de zonovergoten middaghemel op deze betoverende dag, waarbij de atmosferische omstandigheden een onbelemmerde convergentie van cumulusbewolking en uitgestrekte stratosferische azuurblauwe wijdheid faciliteren, lijken de geaggregeerde weersverschijnselen van vandaag, die variëren van sporadische plensbuien tot kalme zuchtjes wind en zeldzame opvlammingen van bliksem, de delicate balans tussen meteorologische complexiteit en eenvoud te weerspiegelen, waardoor de gepassioneerde observator met een gevoel van ontzag en verwondering wordt vervuld."
).strip()
def _get_increment_size(num_sents) -> int:
if BATCH_SIZE >= num_sents:
return 100
else:
return ceil(100 / (num_sents / BATCH_SIZE))
btn_col, results_col = st.columns(2)
btn_ct = btn_col.empty()
error_ct = st.empty()
simpl_ct = st.container()
if st.session_state["text_to_simplify"]:
if btn_ct.button("Simplify text"):
error_ct.empty()
lines = [strip_line for line in st.session_state["text_to_simplify"].splitlines() if (strip_line := line.strip())]
num_sentences = len(lines)
pbar = st.progress(0, text=f"Simplifying sentences in batches of {BATCH_SIZE}...")
increment = _get_increment_size(num_sentences)
percent_done = 0
model, tokenizer = get_resources()
simpl_ct.caption("Simplified text")
output_ct = simpl_ct.empty()
all_simplifications = []
html = "<ol>"
for input_batch, simplifications in simplify(lines, model, tokenizer):
for input_text, simplification in zip(input_batch, simplifications):
output_ct.empty()
html += f"""<li>
<ul>
<li><strong>Input text:</strong> {input_text}</li>
<li><strong>Simplification:</strong> {simplification}</li>
</ul>
</li>"""
output_ct.markdown(html+"</ol>", unsafe_allow_html=True)
all_simplifications.extend(simplifications)
percent_done += increment
pbar.progress(min(percent_done, 100))
pbar.empty()
all_simplifications = "\n".join(all_simplifications) + "\n"
b64 = base64.b64encode(all_simplifications.encode("utf-8")).decode("utf-8")
results_col.markdown(f'<a download="dutch-simplifications.txt" href="data:file/txt;base64,{b64}" title="Download">Download simplifications</a>', unsafe_allow_html=True)
else:
btn_ct.empty()
error_ct.error("Text cannot be empty!", icon="⚠️")
simpl_ct.container()
########################
# Information, socials #
########################
st.header("Project background")
st.markdown("""""")
st.header("Contact ✒️")
st.markdown("Would you like additional functionality in the demo, do you have questions, or just want to get in touch?"
" Give me a shout on [Twitter](https://twitter.com/BramVanroy)"
" or add me on [LinkedIn](https://www.linkedin.com/in/bramvanroy/)!")