Spaces:
Build error
Build error
import nltk | |
import streamlit as st | |
import validators | |
from transformers import pipeline | |
from validators import ValidationFailure | |
from Summarizer import Summarizer | |
nltk.download('punkt') | |
DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH = 10 | |
st.markdown('# Terms & conditions abstractive summarization model :pencil:') | |
st.write('This app provides the abstract summary of the provided terms & conditions. ' | |
'The abstractive summarization is preceded by LSA (Latent Semantic Analysis) extractive summarization') | |
st.write('Information about the model :point_right: https://huggingface.co/ml6team/distilbart-tos-summarizer-tosdr') | |
st.markdown(""" | |
To use this: | |
- Number of sentences to be extracted is configurable | |
- Specify an URL to extract contents OR copy terms & conditions content and hit 'Summarize' | |
""") | |
def create_pipeline(): | |
with st.spinner('Please wait for the model to load...'): | |
terms_and_conditions_pipeline = pipeline( | |
task='summarization', | |
model='ml6team/distilbart-tos-summarizer-tosdr', | |
tokenizer='ml6team/distilbart-tos-summarizer-tosdr' | |
) | |
return terms_and_conditions_pipeline | |
def display_abstractive_summary(summary) -> None: | |
st.subheader("Abstractive Summary") | |
st.markdown('#####') | |
st.markdown(summary) | |
def display_extractive_summary(terms_and_conditions_sentences: list, summary_sentences: list) -> None: | |
st.subheader("Extractive Summary") | |
st.markdown('#####') | |
terms_and_conditions = " ".join(sentence for sentence in terms_and_conditions_sentences) | |
replaced_text = terms_and_conditions | |
for sentence in summary_sentences: | |
replaced_text = replaced_text.replace(sentence, f"<span style='background-color: #FFFF00'>{sentence}</span>") | |
st.write(replaced_text, unsafe_allow_html=True) | |
def is_valid_url(url: str) -> bool: | |
result = validators.url(url) | |
if isinstance(result, ValidationFailure): | |
return False | |
return True | |
summarizer: Summarizer = Summarizer(create_pipeline()) | |
if 'tc_text' not in st.session_state: | |
st.session_state['tc_text'] = '' | |
if 'sentences_length' not in st.session_state: | |
st.session_state['sentences_length'] = DEFAULT_EXTRACTED_ARTICLE_SENTENCES_LENGTH | |
st.write('<style>div.row-widget.stRadio > div{flex-direction:row;}</style>', unsafe_allow_html=True) | |
st.header("Input") | |
with st.form(key='terms-and-conditions'): | |
sentences_length_input = st.number_input( | |
label='Number of sentences to be extracted:', | |
min_value=1, | |
value=st.session_state.sentences_length | |
) | |
tc_text_input = st.text_area( | |
value=st.session_state.tc_text, | |
label='Terms & conditions content or specify an URL:', | |
height=240 | |
) | |
submit_button = st.form_submit_button(label='Summarize') | |
if submit_button: | |
if is_valid_url(tc_text_input): | |
(all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_url(tc_text_input, | |
sentences_length_input) | |
else: | |
(all_sentences, extract_summary_sentences) = summarizer.extractive_summary_from_text(tc_text_input, | |
sentences_length_input) | |
extract_summary = " ".join([sentence for sentence in extract_summary_sentences]) | |
abstract_summary = summarizer.abstractive_summary(extract_summary) | |
display_extractive_summary(all_sentences, extract_summary_sentences) | |
display_abstractive_summary(abstract_summary) | |