byt5_ocr_corrector

Sleeping

App Files Files Community

Thomas Dehaene commited on Sep 28, 2021

Commit

34eb802

1 Parent(s): b3f5232

Bump streamlit

Browse files

Files changed (2) hide show

app.py +11 -24
packages.txt +1 -0

app.py CHANGED Viewed

@@ -2,19 +2,6 @@ from textwrap import wrap
 from transformers import pipeline
 import nlpaug.augmenter.char as nac
-import subprocess
-import sys
-import logging
-import importlib
-def install():
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "streamlit==0.89.0"])
-install()
-import streamlit
-logging.warning(streamlit.__version__)
 streamlit.markdown('# ByT5 Dutch OCR Corrector :pill:')
 streamlit.write('This app corrects common dutch OCR mistakes, to showcase how this could be used in an OCR post-processing pipeline.')
@@ -24,11 +11,11 @@ To use this:
 - Enter a text with OCR mistakes and hit 'unscramble':point_down:
 - Or enter a normal text, scramble it :twisted_rightwards_arrows: and then hit 'unscramble' :point_down:""")
-@streamlit.cache(allow_output_mutation=True,
           suppress_st_warning=True,
           show_spinner=False)
 def load_model():
-    with streamlit.spinner('Please wait for the model to load...'):
         ocr_pipeline=pipeline(
             'text2text-generation',
             model='ml6team/byt5-base-dutch-ocr-correction',
@@ -39,10 +26,10 @@ def load_model():
 ocr_pipeline = load_model()
-if 'text' not in streamlit.session_state:
-    streamlit.session_state.text = ""
-left_area, right_area = streamlit.beta_columns(2)
 # Format the left area
 left_area.header("Input")
@@ -58,16 +45,16 @@ right_area.header("Output")
 if scramble_button:
     aug = nac.OcrAug()
-    streamlit.session_state.text = streamlit.session_state.input_text
-    base_text = streamlit.session_state.text
     augmented_data = aug.augment(base_text)
-    streamlit.session_state.text = augmented_data
-    del streamlit.session_state.input_text
     placeholder.empty()
-    input_text = placeholder.text_area(value=streamlit.session_state.text, label='Insert text:', key='input_text')
 if submit_button:
-    base_text = streamlit.session_state.input_text
     output_text = " ".join([x['generated_text'] for x in ocr_pipeline(wrap(base_text, 128))])
     right_area.markdown('#####')
     right_area.text_area(value=output_text, label="Corrected text:")

 from transformers import pipeline
 import nlpaug.augmenter.char as nac
 streamlit.markdown('# ByT5 Dutch OCR Corrector :pill:')
 streamlit.write('This app corrects common dutch OCR mistakes, to showcase how this could be used in an OCR post-processing pipeline.')
 - Enter a text with OCR mistakes and hit 'unscramble':point_down:
 - Or enter a normal text, scramble it :twisted_rightwards_arrows: and then hit 'unscramble' :point_down:""")
+@st.cache(allow_output_mutation=True,
           suppress_st_warning=True,
           show_spinner=False)
 def load_model():
+    with st.spinner('Please wait for the model to load...'):
         ocr_pipeline=pipeline(
             'text2text-generation',
             model='ml6team/byt5-base-dutch-ocr-correction',
 ocr_pipeline = load_model()
+if 'text' not in st.session_state:
+    st.session_state.text = ""
+left_area, right_area = st.beta_columns(2)
 # Format the left area
 left_area.header("Input")
 if scramble_button:
     aug = nac.OcrAug()
+    st.session_state.text = st.session_state.input_text
+    base_text = st.session_state.text
     augmented_data = aug.augment(base_text)
+    st.session_state.text = augmented_data
+    del st.session_state.input_text
     placeholder.empty()
+    input_text = placeholder.text_area(value=st.session_state.text, label='Insert text:', key='input_text')
 if submit_button:
+    base_text = st.session_state.input_text
     output_text = " ".join([x['generated_text'] for x in ocr_pipeline(wrap(base_text, 128))])
     right_area.markdown('#####')
     right_area.text_area(value=output_text, label="Corrected text:")

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ curl && export STREAMLIT_VERSION==0.89.0