Spaces:
Sleeping
Sleeping
Thomas Dehaene
commited on
Commit
·
34eb802
1
Parent(s):
b3f5232
Bump streamlit
Browse files- app.py +11 -24
- packages.txt +1 -0
app.py
CHANGED
@@ -2,19 +2,6 @@ from textwrap import wrap
|
|
2 |
|
3 |
from transformers import pipeline
|
4 |
import nlpaug.augmenter.char as nac
|
5 |
-
import subprocess
|
6 |
-
import sys
|
7 |
-
import logging
|
8 |
-
import importlib
|
9 |
-
|
10 |
-
def install():
|
11 |
-
subprocess.check_call([sys.executable, "-m", "pip", "install", "streamlit==0.89.0"])
|
12 |
-
|
13 |
-
install()
|
14 |
-
import streamlit
|
15 |
-
|
16 |
-
logging.warning(streamlit.__version__)
|
17 |
-
|
18 |
|
19 |
streamlit.markdown('# ByT5 Dutch OCR Corrector :pill:')
|
20 |
streamlit.write('This app corrects common dutch OCR mistakes, to showcase how this could be used in an OCR post-processing pipeline.')
|
@@ -24,11 +11,11 @@ To use this:
|
|
24 |
- Enter a text with OCR mistakes and hit 'unscramble':point_down:
|
25 |
- Or enter a normal text, scramble it :twisted_rightwards_arrows: and then hit 'unscramble' :point_down:""")
|
26 |
|
27 |
-
@
|
28 |
suppress_st_warning=True,
|
29 |
show_spinner=False)
|
30 |
def load_model():
|
31 |
-
with
|
32 |
ocr_pipeline=pipeline(
|
33 |
'text2text-generation',
|
34 |
model='ml6team/byt5-base-dutch-ocr-correction',
|
@@ -39,10 +26,10 @@ def load_model():
|
|
39 |
ocr_pipeline = load_model()
|
40 |
|
41 |
|
42 |
-
if 'text' not in
|
43 |
-
|
44 |
|
45 |
-
left_area, right_area =
|
46 |
|
47 |
# Format the left area
|
48 |
left_area.header("Input")
|
@@ -58,16 +45,16 @@ right_area.header("Output")
|
|
58 |
|
59 |
if scramble_button:
|
60 |
aug = nac.OcrAug()
|
61 |
-
|
62 |
-
base_text =
|
63 |
augmented_data = aug.augment(base_text)
|
64 |
-
|
65 |
-
del
|
66 |
placeholder.empty()
|
67 |
-
input_text = placeholder.text_area(value=
|
68 |
|
69 |
if submit_button:
|
70 |
-
base_text =
|
71 |
output_text = " ".join([x['generated_text'] for x in ocr_pipeline(wrap(base_text, 128))])
|
72 |
right_area.markdown('#####')
|
73 |
right_area.text_area(value=output_text, label="Corrected text:")
|
|
|
2 |
|
3 |
from transformers import pipeline
|
4 |
import nlpaug.augmenter.char as nac
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
streamlit.markdown('# ByT5 Dutch OCR Corrector :pill:')
|
7 |
streamlit.write('This app corrects common dutch OCR mistakes, to showcase how this could be used in an OCR post-processing pipeline.')
|
|
|
11 |
- Enter a text with OCR mistakes and hit 'unscramble':point_down:
|
12 |
- Or enter a normal text, scramble it :twisted_rightwards_arrows: and then hit 'unscramble' :point_down:""")
|
13 |
|
14 |
+
@st.cache(allow_output_mutation=True,
|
15 |
suppress_st_warning=True,
|
16 |
show_spinner=False)
|
17 |
def load_model():
|
18 |
+
with st.spinner('Please wait for the model to load...'):
|
19 |
ocr_pipeline=pipeline(
|
20 |
'text2text-generation',
|
21 |
model='ml6team/byt5-base-dutch-ocr-correction',
|
|
|
26 |
ocr_pipeline = load_model()
|
27 |
|
28 |
|
29 |
+
if 'text' not in st.session_state:
|
30 |
+
st.session_state.text = ""
|
31 |
|
32 |
+
left_area, right_area = st.beta_columns(2)
|
33 |
|
34 |
# Format the left area
|
35 |
left_area.header("Input")
|
|
|
45 |
|
46 |
if scramble_button:
|
47 |
aug = nac.OcrAug()
|
48 |
+
st.session_state.text = st.session_state.input_text
|
49 |
+
base_text = st.session_state.text
|
50 |
augmented_data = aug.augment(base_text)
|
51 |
+
st.session_state.text = augmented_data
|
52 |
+
del st.session_state.input_text
|
53 |
placeholder.empty()
|
54 |
+
input_text = placeholder.text_area(value=st.session_state.text, label='Insert text:', key='input_text')
|
55 |
|
56 |
if submit_button:
|
57 |
+
base_text = st.session_state.input_text
|
58 |
output_text = " ".join([x['generated_text'] for x in ocr_pipeline(wrap(base_text, 128))])
|
59 |
right_area.markdown('#####')
|
60 |
right_area.text_area(value=output_text, label="Corrected text:")
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
curl && export STREAMLIT_VERSION==0.89.0
|