wiraindrak commited on
Commit
d7ed4ab
·
1 Parent(s): 4b97b7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -6,13 +6,13 @@ from gradio.mix import Parallel
6
  tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
7
  model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
8
 
9
- #tokenizer_bert = BertTokenizer.from_pretrained("cahya/bert2bert-indonesian-summarization")
10
- #tokenizer_bert.bos_token = tokenizer_bert.cls_token
11
- #tokenizer_bert.eos_token = tokenizer_bert.sep_token
12
- #model_bert = EncoderDecoderModel.from_pretrained("cahya/bert2bert-indonesian-summarization")
13
 
14
- t5_para_tokenizer = AutoTokenizer.from_pretrained("Wikidepia/IndoT5-base-paraphrase")
15
- t5_para_model = AutoModelForSeq2SeqLM.from_pretrained("Wikidepia/IndoT5-base-paraphrase")
16
 
17
 
18
  def summ_t5(text):
@@ -31,18 +31,13 @@ def summ_t5(text):
31
  def summ_bert(text):
32
  input_ids = tokenizer_bert(text, return_tensors="pt")
33
  summary_ids= model_bert.generate(input_ids,
34
- min_length=20,
35
  max_length=100,
36
  num_beams=10,
37
  repetition_penalty=2.5,
38
  length_penalty=1.0,
39
  early_stopping=True,
40
  no_repeat_ngram_size=2,
41
- use_cache=True,
42
- do_sample = True,
43
- temperature = 0.8,
44
- top_k = 50,
45
- top_p = 0.95)
46
 
47
  summary_text = tokenizer_bert.decode(summary_ids[0], skip_special_tokens=True)
48
  return summary_text
@@ -67,9 +62,9 @@ def para_t5(text):
67
 
68
  def summarize(text):
69
  t5_ = summ_t5(text)
70
- #bert_ = summ_bert(text)
71
  #para_ = para_t5(t5_)
72
- return t5_
73
 
74
  if __name__ == "__main__":
75
  with gr.Blocks() as demo:
@@ -79,7 +74,7 @@ if __name__ == "__main__":
79
  analyze_button = gr.Button(label="Analyze")
80
  with gr.Column():
81
  t5_output = gr.Textbox(label="T5 Base Output")
82
- #bert_output = gr.Textbox(label="Bert Base Output")
83
  #para_output = gr.Textbox(label="T5 Paraphrase Output")
84
- analyze_button.click(summarize, input_text, [t5_output])
85
  demo.launch()
 
6
  tokenizer_t5 = T5Tokenizer.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
7
  model_t5 = T5ForConditionalGeneration.from_pretrained("panggi/t5-base-indonesian-summarization-cased")
8
 
9
+ tokenizer_bert = BertTokenizer.from_pretrained("cahya/bert2bert-indonesian-summarization")
10
+ tokenizer_bert.bos_token = tokenizer_bert.cls_token
11
+ tokenizer_bert.eos_token = tokenizer_bert.sep_token
12
+ model_bert = EncoderDecoderModel.from_pretrained("cahya/bert2bert-indonesian-summarization")
13
 
14
+ #t5_para_tokenizer = AutoTokenizer.from_pretrained("Wikidepia/IndoT5-base-paraphrase")
15
+ #t5_para_model = AutoModelForSeq2SeqLM.from_pretrained("Wikidepia/IndoT5-base-paraphrase")
16
 
17
 
18
  def summ_t5(text):
 
31
  def summ_bert(text):
32
  input_ids = tokenizer_bert(text, return_tensors="pt")
33
  summary_ids= model_bert.generate(input_ids,
 
34
  max_length=100,
35
  num_beams=10,
36
  repetition_penalty=2.5,
37
  length_penalty=1.0,
38
  early_stopping=True,
39
  no_repeat_ngram_size=2,
40
+ use_cache=True)
 
 
 
 
41
 
42
  summary_text = tokenizer_bert.decode(summary_ids[0], skip_special_tokens=True)
43
  return summary_text
 
62
 
63
  def summarize(text):
64
  t5_ = summ_t5(text)
65
+ bert_ = summ_bert(text)
66
  #para_ = para_t5(t5_)
67
+ return t5_, bert_
68
 
69
  if __name__ == "__main__":
70
  with gr.Blocks() as demo:
 
74
  analyze_button = gr.Button(label="Analyze")
75
  with gr.Column():
76
  t5_output = gr.Textbox(label="T5 Base Output")
77
+ bert_output = gr.Textbox(label="Bert Base Output")
78
  #para_output = gr.Textbox(label="T5 Paraphrase Output")
79
+ analyze_button.click(summarize, inputs=input_text, outputs=[t5_output, bert_output])
80
  demo.launch()