MatthiasC commited on
Commit
937ed20
Β·
1 Parent(s): 065051d

n CLean up code

Browse files
Files changed (3) hide show
  1. .idea/workspace.xml +2 -1
  2. app.py +3 -17
  3. custom_renderer.py +25 -26
.idea/workspace.xml CHANGED
@@ -2,6 +2,7 @@
2
  <project version="4">
3
  <component name="ChangeListManager">
4
  <list default="true" id="57f23431-346d-451d-8d77-db859508e831" name="Changes" comment="">
 
5
  <change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
6
  <change beforePath="$PROJECT_DIR$/custom_renderer.py" beforeDir="false" afterPath="$PROJECT_DIR$/custom_renderer.py" afterDir="false" />
7
  </list>
@@ -43,7 +44,7 @@
43
  <component name="PropertiesComponent"><![CDATA[{
44
  "keyToString": {
45
  "last_opened_file_path": "/home/matthias/Documents/Summarization-fact-checker/HugginfaceSpace/HFSummSpace",
46
- "settings.editor.selected.configurable": "editor.preferences.fonts.default"
47
  }
48
  }]]></component>
49
  <component name="RecentsManager">
 
2
  <project version="4">
3
  <component name="ChangeListManager">
4
  <list default="true" id="57f23431-346d-451d-8d77-db859508e831" name="Changes" comment="">
5
+ <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
6
  <change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
7
  <change beforePath="$PROJECT_DIR$/custom_renderer.py" beforeDir="false" afterPath="$PROJECT_DIR$/custom_renderer.py" afterDir="false" />
8
  </list>
 
44
  <component name="PropertiesComponent"><![CDATA[{
45
  "keyToString": {
46
  "last_opened_file_path": "/home/matthias/Documents/Summarization-fact-checker/HugginfaceSpace/HFSummSpace",
47
+ "settings.editor.selected.configurable": "editor.preferences.folding"
48
  }
49
  }]]></component>
50
  <component name="RecentsManager">
app.py CHANGED
@@ -19,7 +19,8 @@ from transformers import pipeline
19
  import os
20
 
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
-
 
23
 
24
  @st.experimental_singleton
25
  def get_sentence_embedding_model():
@@ -108,7 +109,6 @@ def fetch_dependency_svg(filename: str) -> AnyStr:
108
  def display_summary(summary_content: str):
109
  st.session_state.summary_output = summary_content
110
  soup = BeautifulSoup(summary_content, features="html.parser")
111
- HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
112
  return HTML_WRAPPER.format(soup)
113
 
114
 
@@ -149,7 +149,6 @@ def get_all_entities(text):
149
  return list(itertools.chain.from_iterable(all_entities_per_sentence))
150
 
151
 
152
- # TODO: this functionality can be cached (e.g. by storing html file output) if wanted (or just store list of entities idk)
153
  def get_and_compare_entities():
154
  # article_content = fetch_article_contents(article_name)
155
  article_content = st.session_state.article_text
@@ -194,10 +193,6 @@ def highlight_entities():
194
  for entity in unmatched_entities:
195
  summary_content = summary_content.replace(entity, markdown_start_red + entity + markdown_end)
196
  soup = BeautifulSoup(summary_content, features="html.parser")
197
-
198
- HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
199
- margin-bottom: 2.5rem">{}</div> """
200
-
201
  return HTML_WRAPPER.format(soup)
202
 
203
 
@@ -207,9 +202,7 @@ def render_dependency_parsing(text: Dict):
207
  st.write(get_svg(html), unsafe_allow_html=True)
208
 
209
 
210
- # If deps for article: True, otherwise deps for summary calc
211
  def check_dependency(article: bool):
212
- # nlp = spacy.load('en_core_web_lg')
213
  if article:
214
  text = st.session_state.article_text
215
  all_entities = get_all_entities_per_sentence(text)
@@ -220,7 +213,6 @@ def check_dependency(article: bool):
220
  # all_entities = st.session_state.entities_per_sentence_summary
221
  doc = nlp(text)
222
  tok_l = doc.to_json()['tokens']
223
- # all_deps = ""
224
  test_list_dict_output = []
225
 
226
  sentences = list(doc.sents)
@@ -244,7 +236,6 @@ def check_dependency(article: bool):
244
  "target_word_index": (t['head'] - sentence.start),
245
  "identifier": identifier, "sentence": str(sentence)})
246
  elif object_target in all_entities[i]:
247
- # all_deps = all_deps.join(str(sentence))
248
  identifier = object_here + t['dep'] + object_target
249
  test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
250
  "target_word_index": (t['head'] - sentence.start),
@@ -252,7 +243,6 @@ def check_dependency(article: bool):
252
  else:
253
  continue
254
  return test_list_dict_output
255
- # return all_deps
256
 
257
 
258
  def render_svg(svg_file):
@@ -320,7 +310,7 @@ st.markdown("Let’s start by selecting an article text for which we want to gen
320
  "generated from it might not be optimal, leading to suboptimal performance of the post-processing steps.")
321
 
322
  selected_article = st.selectbox('Select an article or provide your own:',
323
- list_all_article_names()) # index=0, format_func=special_internal_function, key=None, help=None, on_change=None, args=None, kwargs=None, *, disabled=False)
324
  st.session_state.article_text = fetch_article_contents(selected_article)
325
  article_text = st.text_area(
326
  label='Full article text',
@@ -391,8 +381,6 @@ if summarize_button:
391
  if st.session_state.unchanged_text:
392
  entity_specific_text = fetch_entity_specific_contents(selected_article)
393
  soup = BeautifulSoup(entity_specific_text, features="html.parser")
394
- HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
395
- margin-bottom: 2.5rem">{}</div> """
396
  st.write("πŸ’‘πŸ‘‡ **Specific example explanation** πŸ‘‡πŸ’‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
397
 
398
  # DEPENDENCY PARSING PART
@@ -429,8 +417,6 @@ if summarize_button:
429
  st.write(cur_svg_image, unsafe_allow_html=True)
430
  dep_specific_text = fetch_dependency_specific_contents(selected_article)
431
  soup = BeautifulSoup(dep_specific_text, features="html.parser")
432
- HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
433
- margin-bottom: 2.5rem">{}</div> """
434
  st.write("πŸ’‘πŸ‘‡ **Specific example explanation** πŸ‘‡πŸ’‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
435
  else:
436
  summary_deps = check_dependency(False)
 
19
  import os
20
 
21
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
22
+ HTML_WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem;
23
+ margin-bottom: 2.5rem">{}</div> """
24
 
25
  @st.experimental_singleton
26
  def get_sentence_embedding_model():
 
109
  def display_summary(summary_content: str):
110
  st.session_state.summary_output = summary_content
111
  soup = BeautifulSoup(summary_content, features="html.parser")
 
112
  return HTML_WRAPPER.format(soup)
113
 
114
 
 
149
  return list(itertools.chain.from_iterable(all_entities_per_sentence))
150
 
151
 
 
152
  def get_and_compare_entities():
153
  # article_content = fetch_article_contents(article_name)
154
  article_content = st.session_state.article_text
 
193
  for entity in unmatched_entities:
194
  summary_content = summary_content.replace(entity, markdown_start_red + entity + markdown_end)
195
  soup = BeautifulSoup(summary_content, features="html.parser")
 
 
 
 
196
  return HTML_WRAPPER.format(soup)
197
 
198
 
 
202
  st.write(get_svg(html), unsafe_allow_html=True)
203
 
204
 
 
205
  def check_dependency(article: bool):
 
206
  if article:
207
  text = st.session_state.article_text
208
  all_entities = get_all_entities_per_sentence(text)
 
213
  # all_entities = st.session_state.entities_per_sentence_summary
214
  doc = nlp(text)
215
  tok_l = doc.to_json()['tokens']
 
216
  test_list_dict_output = []
217
 
218
  sentences = list(doc.sents)
 
236
  "target_word_index": (t['head'] - sentence.start),
237
  "identifier": identifier, "sentence": str(sentence)})
238
  elif object_target in all_entities[i]:
 
239
  identifier = object_here + t['dep'] + object_target
240
  test_list_dict_output.append({"dep": t['dep'], "cur_word_index": (t['id'] - sentence.start),
241
  "target_word_index": (t['head'] - sentence.start),
 
243
  else:
244
  continue
245
  return test_list_dict_output
 
246
 
247
 
248
  def render_svg(svg_file):
 
310
  "generated from it might not be optimal, leading to suboptimal performance of the post-processing steps.")
311
 
312
  selected_article = st.selectbox('Select an article or provide your own:',
313
+ list_all_article_names())
314
  st.session_state.article_text = fetch_article_contents(selected_article)
315
  article_text = st.text_area(
316
  label='Full article text',
 
381
  if st.session_state.unchanged_text:
382
  entity_specific_text = fetch_entity_specific_contents(selected_article)
383
  soup = BeautifulSoup(entity_specific_text, features="html.parser")
 
 
384
  st.write("πŸ’‘πŸ‘‡ **Specific example explanation** πŸ‘‡πŸ’‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
385
 
386
  # DEPENDENCY PARSING PART
 
417
  st.write(cur_svg_image, unsafe_allow_html=True)
418
  dep_specific_text = fetch_dependency_specific_contents(selected_article)
419
  soup = BeautifulSoup(dep_specific_text, features="html.parser")
 
 
420
  st.write("πŸ’‘πŸ‘‡ **Specific example explanation** πŸ‘‡πŸ’‘", HTML_WRAPPER.format(soup), unsafe_allow_html=True)
421
  else:
422
  summary_deps = check_dependency(False)
custom_renderer.py CHANGED
@@ -1,6 +1,26 @@
1
  from typing import Dict
2
  from PIL import ImageFont
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  def get_pil_text_size(text, font_size, font_name):
@@ -21,15 +41,7 @@ def render_arrow(
21
  i (int): Unique ID, typically arrow index.
22
  RETURNS (str): Rendered SVG markup.
23
  """
24
- TPL_DEP_ARCS = """
25
- <g class="displacy-arrow">
26
- <path class="displacy-arc" id="arrow-{id}-{i}" stroke-width="{stroke}px" d="{arc}" fill="none" stroke="red"/>
27
- <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
28
- <textPath xlink:href="#arrow-{id}-{i}" class="displacy-label" startOffset="50%" side="{label_side}" fill="red" text-anchor="middle">{label}</textPath>
29
- </text>
30
- <path class="displacy-arrowhead" d="{head}" fill="red"/>
31
- </g>
32
- """
33
  arc = get_arc(start + 10, 50, 5, end + 10)
34
  arrowhead = get_arrowhead(direction, start + 10, 50, end + 10)
35
  label_side = "right" if direction == "rtl" else "left"
@@ -75,26 +87,15 @@ def get_arrowhead(direction: str, x: int, y: int, end: int) -> str:
75
 
76
 
77
  def render_sentence_custom(unmatched_list: Dict, nlp):
78
- TPL_DEP_WORDS = """
79
- <text class="displacy-token" fill="currentColor" text-anchor="start" y="{y}">
80
- <tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
81
- <tspan class="displacy-tag" dy="2em" fill="currentColor" x="{x}">{tag}</tspan>
82
- </text>
83
- """
84
-
85
- TPL_DEP_SVG = """
86
- <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="{lang}" id="{id}" class="displacy" width="{width}" height="{height}" direction="{dir}" style="max-width: none; height: {height}px; color: {color}; background: {bg}; font-family: {font}; direction: {dir}">{content}</svg>
87
- """
88
  arcs_svg = []
89
- #nlp = spacy.load('en_core_web_lg')
90
  doc = nlp(unmatched_list["sentence"])
91
 
92
  x_value_counter = 10
93
  index_counter = 0
94
  svg_words = []
95
- #words = unmatched_list["sentence"].split(" ")
96
- coords_test = []
97
  direction_current = "rtl"
 
98
  if unmatched_list["cur_word_index"] < unmatched_list["target_word_index"]:
99
  min_index = unmatched_list["cur_word_index"]
100
  max_index = unmatched_list["target_word_index"]
@@ -108,13 +109,13 @@ def render_sentence_custom(unmatched_list: Dict, nlp):
108
  pixel_x_length = get_pil_text_size(word, 16, 'arial.ttf')[0]
109
  svg_words.append(TPL_DEP_WORDS.format(text=word, tag="", x=x_value_counter, y=70))
110
  if min_index <= index_counter <= max_index:
111
- coords_test.append(x_value_counter)
112
  if index_counter < max_index - 1:
113
  x_value_counter += 50
114
  index_counter += 1
115
  x_value_counter += pixel_x_length + 4
116
 
117
- arcs_svg.append(render_arrow(unmatched_list['dep'], coords_test[0], coords_test[-1], direction_current, i))
118
 
119
  content = "".join(svg_words) + "".join(arcs_svg)
120
 
@@ -130,5 +131,3 @@ def render_sentence_custom(unmatched_list: Dict, nlp):
130
  lang="en",
131
  )
132
  return full_svg
133
-
134
-
 
1
  from typing import Dict
2
  from PIL import ImageFont
3
 
4
+ TPL_DEP_WORDS = """
5
+ <text class="displacy-token" fill="currentColor" text-anchor="start" y="{y}">
6
+ <tspan class="displacy-word" fill="currentColor" x="{x}">{text}</tspan>
7
+ <tspan class="displacy-tag" dy="2em" fill="currentColor" x="{x}">{tag}</tspan>
8
+ </text>
9
+ """
10
+
11
+ TPL_DEP_SVG = """
12
+ <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:lang="{lang}" id="{id}" class="displacy" width="{width}" height="{height}" direction="{dir}" style="max-width: none; height: {height}px; color: {color}; background: {bg}; font-family: {font}; direction: {dir}">{content}</svg>
13
+ """
14
+
15
+ TPL_DEP_ARCS = """
16
+ <g class="displacy-arrow">
17
+ <path class="displacy-arc" id="arrow-{id}-{i}" stroke-width="{stroke}px" d="{arc}" fill="none" stroke="red"/>
18
+ <text dy="1.25em" style="font-size: 0.8em; letter-spacing: 1px">
19
+ <textPath xlink:href="#arrow-{id}-{i}" class="displacy-label" startOffset="50%" side="{label_side}" fill="red" text-anchor="middle">{label}</textPath>
20
+ </text>
21
+ <path class="displacy-arrowhead" d="{head}" fill="red"/>
22
+ </g>
23
+ """
24
 
25
 
26
  def get_pil_text_size(text, font_size, font_name):
 
41
  i (int): Unique ID, typically arrow index.
42
  RETURNS (str): Rendered SVG markup.
43
  """
44
+
 
 
 
 
 
 
 
 
45
  arc = get_arc(start + 10, 50, 5, end + 10)
46
  arrowhead = get_arrowhead(direction, start + 10, 50, end + 10)
47
  label_side = "right" if direction == "rtl" else "left"
 
87
 
88
 
89
  def render_sentence_custom(unmatched_list: Dict, nlp):
 
 
 
 
 
 
 
 
 
 
90
  arcs_svg = []
 
91
  doc = nlp(unmatched_list["sentence"])
92
 
93
  x_value_counter = 10
94
  index_counter = 0
95
  svg_words = []
96
+ words_under_arc = []
 
97
  direction_current = "rtl"
98
+
99
  if unmatched_list["cur_word_index"] < unmatched_list["target_word_index"]:
100
  min_index = unmatched_list["cur_word_index"]
101
  max_index = unmatched_list["target_word_index"]
 
109
  pixel_x_length = get_pil_text_size(word, 16, 'arial.ttf')[0]
110
  svg_words.append(TPL_DEP_WORDS.format(text=word, tag="", x=x_value_counter, y=70))
111
  if min_index <= index_counter <= max_index:
112
+ words_under_arc.append(x_value_counter)
113
  if index_counter < max_index - 1:
114
  x_value_counter += 50
115
  index_counter += 1
116
  x_value_counter += pixel_x_length + 4
117
 
118
+ arcs_svg.append(render_arrow(unmatched_list['dep'], words_under_arc[0], words_under_arc[-1], direction_current, i))
119
 
120
  content = "".join(svg_words) + "".join(arcs_svg)
121
 
 
131
  lang="en",
132
  )
133
  return full_svg