Victoria Slocum commited on
Commit
db85c2c
·
1 Parent(s): c498800
Files changed (1) hide show
  1. app.py +116 -43
app.py CHANGED
@@ -16,6 +16,7 @@ texts = {"en": DEFAULT_TEXT, "ca": "Apple està buscant comprar una startup del
16
  "ja": "アップルがイギリスの新興企業を10億ドルで購入を検討", "ko": "애플이 영국의 스타트업을 10억 달러에 인수하는 것을 알아보고 있다.", "lt": "Jaunikis pirmąją vestuvinę naktį iškeitė į areštinės gultą", "nb": "Apple vurderer å kjøpe britisk oppstartfirma for en milliard dollar.", "nl": "Apple overweegt om voor 1 miljard een U.K. startup te kopen",
17
  "pl": "Poczuł przyjemną woń mocnej kawy.", "pt": "Apple está querendo comprar uma startup do Reino Unido por 100 milhões de dólares", "ro": "Apple plănuiește să cumpere o companie britanică pentru un miliard de dolari", "ru": "Apple рассматривает возможность покупки стартапа из Соединённого Королевства за $1 млрд", "sv": "Apple överväger att köpa brittisk startup för 1 miljard dollar.", "zh": "作为语言而言,为世界使用人数最多的语言,目前世界有五分之一人口做为母语。"}
18
 
 
19
  def get_all_models():
20
  with open("requirements.txt") as f:
21
  content = f.readlines()
@@ -35,7 +36,7 @@ def dependency(text, col_punct, col_phrase, compact, bg, font, model):
35
  nlp = spacy.load(model + "_sm")
36
  doc = nlp(text)
37
  options = {"compact": compact, "collapse_phrases": col_phrase,
38
- "collapse_punct": col_punct, "bg": bg, "color":font}
39
  html = displacy.render(doc, style="dep", options=options)
40
  return html
41
 
@@ -60,6 +61,7 @@ def token(text, attributes, model):
60
  data = pd.DataFrame(data, columns=attributes)
61
  return data
62
 
 
63
  def default_token(text, attributes, model):
64
  nlp = spacy.load(model + "_sm")
65
  data = []
@@ -154,8 +156,7 @@ with demo:
154
  gr.Markdown("")
155
  with gr.Column():
156
  gr.Markdown("")
157
-
158
-
159
  with gr.Row():
160
  with gr.Column():
161
  text_input = gr.Textbox(
@@ -163,89 +164,161 @@ with demo:
163
  with gr.Column():
164
  gr.Markdown("")
165
  button = gr.Button("Generate", variant="primary")
166
- with gr.Column():
167
  with gr.Tabs():
168
  with gr.TabItem(""):
169
  with gr.Column():
170
- gr.Markdown("## [Dependency Parser](https://spacy.io/usage/visualizers#dep)")
 
 
 
171
  with gr.Row():
172
  with gr.Column():
173
  gr.Markdown("""```python
174
  import spacy
175
  from spacy import displacy
176
 
 
 
177
  displacy.serve(doc, style="dep")
178
  ```
179
  """)
180
-
181
  with gr.Column():
182
- col_punct = gr.Checkbox(label="Collapse Punctuation", value=True)
183
- col_phrase = gr.Checkbox(label="Collapse Phrases", value=True)
 
 
 
 
 
 
 
184
  compact = gr.Checkbox(label="Compact", value=False)
185
  with gr.Column():
186
- bg = gr.Textbox(label="Background Color", value=DEFAULT_COLOR)
 
187
  with gr.Column():
188
- text = gr.Textbox(label="Text Color", value="black")
189
- depen_output = gr.HTML(value=dependency(DEFAULT_TEXT, True, True, False, DEFAULT_COLOR, "black", DEFAULT_MODEL))
 
 
190
  dep_button = gr.Button("Generate Dependency Parser")
191
  gr.Markdown("\n\n\n")
192
  with gr.Box():
193
  with gr.Column():
194
- gr.Markdown("## [Entity Recognizer](https://spacy.io/usage/visualizers#ent)")
195
- with gr.Column():
196
- with gr.Row():
197
- gr.Markdown("""```python
 
 
 
198
  import spacy
199
  from spacy import displacy
200
-
 
 
201
  displacy.serve(doc, style="ent")
202
  ```
203
  """)
204
- with gr.Row():
205
- gr.Markdown("")
206
- with gr.Row():
207
- gr.Markdown("")
208
- entity_input = gr.CheckboxGroup(DEFAULT_ENTS, value=DEFAULT_ENTS)
209
- entity_output = gr.HTML(value=entity(DEFAULT_TEXT, DEFAULT_ENTS, DEFAULT_MODEL))
 
 
210
  ent_button = gr.Button("Generate Entity Recognizer")
211
  with gr.Box():
212
  with gr.Column():
213
- gr.Markdown("## [Token Properties](https://spacy.io/usage/linguistic-features)")
 
 
214
  with gr.Column():
215
- tok_input = gr.CheckboxGroup(
216
- DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
217
- tok_output = gr.Dataframe(headers=DEFAULT_TOK_ATTR, value=default_token(DEFAULT_TEXT, DEFAULT_TOK_ATTR, DEFAULT_MODEL), overflow_row_behaviour="paginate")
 
 
 
 
 
218
  tok_button = gr.Button("Generate Token Properties")
219
  with gr.Box():
220
  with gr.Column():
221
- gr.Markdown("## [Word and Phrase Similarity](https://spacy.io/usage/linguistic-features#vectors-similarity)")
 
 
 
222
  with gr.Row():
223
- sim_text1 = gr.Textbox(
224
- value="Apple", label="Word 1", interactive=True,)
225
- sim_text2 = gr.Textbox(
226
- value="U.K. startup", label="Word 2", interactive=True,)
227
- sim_output = gr.Textbox(label="Similarity Score", value="0.12")
 
 
 
 
 
 
228
  sim_random_button = gr.Button("Generate random words")
229
  sim_button = gr.Button("Generate similarity")
230
  with gr.Box():
231
  with gr.Column():
232
- gr.Markdown("## [Spans](https://spacy.io/usage/visualizers#span)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  with gr.Column():
234
  with gr.Row():
235
- span1 = gr.Textbox(
236
- label="Span 1", value="U.K. startup", placeholder="Input a part of the sentence")
237
- label1 = gr.Textbox(value="ORG",
238
- label="Label for Span 1")
 
 
 
 
 
 
239
  with gr.Row():
240
- span2 = gr.Textbox(
241
- label="Span 2", value="U.K.", placeholder="Input another part of the sentence")
242
- label2 = gr.Textbox(value="GPE",
243
- label="Label for Span 2")
244
- span_output = gr.HTML(value=span(DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL))
 
 
 
 
 
 
 
245
  gr.Markdown(value="\n\n\n\n")
246
  gr.Markdown(value="\n\n\n\n")
247
  span_button = gr.Button("Generate spans")
248
-
249
  text_button.click(get_text, inputs=[model_input], outputs=text_input)
250
  button.click(dependency, inputs=[
251
  text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=depen_output)
 
16
  "ja": "アップルがイギリスの新興企業を10億ドルで購入を検討", "ko": "애플이 영국의 스타트업을 10억 달러에 인수하는 것을 알아보고 있다.", "lt": "Jaunikis pirmąją vestuvinę naktį iškeitė į areštinės gultą", "nb": "Apple vurderer å kjøpe britisk oppstartfirma for en milliard dollar.", "nl": "Apple overweegt om voor 1 miljard een U.K. startup te kopen",
17
  "pl": "Poczuł przyjemną woń mocnej kawy.", "pt": "Apple está querendo comprar uma startup do Reino Unido por 100 milhões de dólares", "ro": "Apple plănuiește să cumpere o companie britanică pentru un miliard de dolari", "ru": "Apple рассматривает возможность покупки стартапа из Соединённого Королевства за $1 млрд", "sv": "Apple överväger att köpa brittisk startup för 1 miljard dollar.", "zh": "作为语言而言,为世界使用人数最多的语言,目前世界有五分之一人口做为母语。"}
18
 
19
+
20
  def get_all_models():
21
  with open("requirements.txt") as f:
22
  content = f.readlines()
 
36
  nlp = spacy.load(model + "_sm")
37
  doc = nlp(text)
38
  options = {"compact": compact, "collapse_phrases": col_phrase,
39
+ "collapse_punct": col_punct, "bg": bg, "color": font}
40
  html = displacy.render(doc, style="dep", options=options)
41
  return html
42
 
 
61
  data = pd.DataFrame(data, columns=attributes)
62
  return data
63
 
64
+
65
  def default_token(text, attributes, model):
66
  nlp = spacy.load(model + "_sm")
67
  data = []
 
156
  gr.Markdown("")
157
  with gr.Column():
158
  gr.Markdown("")
159
+
 
160
  with gr.Row():
161
  with gr.Column():
162
  text_input = gr.Textbox(
 
164
  with gr.Column():
165
  gr.Markdown("")
166
  button = gr.Button("Generate", variant="primary")
167
+ with gr.Column():
168
  with gr.Tabs():
169
  with gr.TabItem(""):
170
  with gr.Column():
171
+ gr.Markdown(
172
+ "## [Dependency Parser](https://spacy.io/usage/visualizers#dep)")
173
+ gr.Markdown(
174
+ "The dependency visualizer, `dep`, shows part-of-speech tags and syntactic dependencies.")
175
  with gr.Row():
176
  with gr.Column():
177
  gr.Markdown("""```python
178
  import spacy
179
  from spacy import displacy
180
 
181
+ nlp = spacy.load("en_core_web_sm")
182
+ doc = nlp(text)
183
  displacy.serve(doc, style="dep")
184
  ```
185
  """)
 
186
  with gr.Column():
187
+ gr.Markdown("")
188
+ with gr.Column():
189
+ gr.Markdown("")
190
+ with gr.Row():
191
+ with gr.Column():
192
+ col_punct = gr.Checkbox(
193
+ label="Collapse Punctuation", value=True)
194
+ col_phrase = gr.Checkbox(
195
+ label="Collapse Phrases", value=True)
196
  compact = gr.Checkbox(label="Compact", value=False)
197
  with gr.Column():
198
+ bg = gr.Textbox(
199
+ label="Background Color", value=DEFAULT_COLOR)
200
  with gr.Column():
201
+ text = gr.Textbox(
202
+ label="Text Color", value="black")
203
+ depen_output = gr.HTML(value=dependency(
204
+ DEFAULT_TEXT, True, True, False, DEFAULT_COLOR, "black", DEFAULT_MODEL))
205
  dep_button = gr.Button("Generate Dependency Parser")
206
  gr.Markdown("\n\n\n")
207
  with gr.Box():
208
  with gr.Column():
209
+ gr.Markdown(
210
+ "## [Entity Recognizer](https://spacy.io/usage/visualizers#ent)")
211
+ gr.Markdown(
212
+ "The entity visualizer, `ent`, highlights named entities and their labels in a text.")
213
+ with gr.Row():
214
+ with gr.Column():
215
+ gr.Markdown("""```python
216
  import spacy
217
  from spacy import displacy
218
+
219
+ nlp = spacy.load("en_core_web_sm")
220
+ doc = nlp(text)
221
  displacy.serve(doc, style="ent")
222
  ```
223
  """)
224
+ with gr.Column():
225
+ gr.Markdown("")
226
+ with gr.Column():
227
+ gr.Markdown("")
228
+ entity_input = gr.CheckboxGroup(
229
+ DEFAULT_ENTS, value=DEFAULT_ENTS)
230
+ entity_output = gr.HTML(value=entity(
231
+ DEFAULT_TEXT, DEFAULT_ENTS, DEFAULT_MODEL))
232
  ent_button = gr.Button("Generate Entity Recognizer")
233
  with gr.Box():
234
  with gr.Column():
235
+ gr.Markdown(
236
+ "## [Token Properties](https://spacy.io/usage/linguistic-features)")
237
+ gr.Markdown("When you put in raw text to spaCy, it returns a `Doc` object with different linguistic features")
238
  with gr.Column():
239
+ with gr.Row():
240
+ with gr.Column():
241
+ tok_input = gr.CheckboxGroup(
242
+ DEFAULT_TOK_ATTR, value=DEFAULT_TOK_ATTR)
243
+ with gr.Column():
244
+ gr.Markdown("")
245
+ tok_output = gr.Dataframe(headers=DEFAULT_TOK_ATTR, value=default_token(
246
+ DEFAULT_TEXT, DEFAULT_TOK_ATTR, DEFAULT_MODEL), overflow_row_behaviour="paginate")
247
  tok_button = gr.Button("Generate Token Properties")
248
  with gr.Box():
249
  with gr.Column():
250
+ gr.Markdown(
251
+ "## [Word and Phrase Similarity](https://spacy.io/usage/linguistic-features#vectors-similarity)")
252
+ gr.Markdown("Words and spans have similarity ratings based off of their word vectors, or word embeddings")
253
+ gr.Markdown(">Word embeddings are multi-dimensional meaning representations of a word.")
254
  with gr.Row():
255
+ with gr.Column():
256
+ sim_text1 = gr.Textbox(
257
+ value="Apple", label="Word 1", interactive=True,)
258
+ with gr.Column():
259
+ sim_text2 = gr.Textbox(
260
+ value="U.K. startup", label="Word 2", interactive=True,)
261
+ with gr.Column():
262
+ sim_output = gr.Textbox(
263
+ label="Similarity Score", value="0.12")
264
+ with gr.Column():
265
+ gr.Markdown("")
266
  sim_random_button = gr.Button("Generate random words")
267
  sim_button = gr.Button("Generate similarity")
268
  with gr.Box():
269
  with gr.Column():
270
+ gr.Markdown(
271
+ "## [Spans](https://spacy.io/usage/visualizers#span)")
272
+ gr.Markdown("The span visualizer, `span`, highlights overlapping spans in a text.")
273
+ with gr.Row():
274
+ with gr.Column():
275
+ gr.Markdown("""```python
276
+ import spacy
277
+ from spacy import displacy
278
+ from spacy.tokens import Span
279
+
280
+ nlp = spacy.load("en_core_web_sm")
281
+ doc = nlp(text)
282
+ doc.spans["sc"] = [
283
+ Span(doc, 6, 8, "ORG")
284
+ Span(doc, 6, 7, "GPE")
285
+ ]
286
+ displacy.serve(doc, style="span")
287
+ ```
288
+ """)
289
+ with gr.Column():
290
+ gr.Markdown("")
291
+ with gr.Column():
292
+ gr.Markdown("")
293
  with gr.Column():
294
  with gr.Row():
295
+ with gr.Column():
296
+ span1 = gr.Textbox(
297
+ label="Span 1", value="U.K. startup", placeholder="Input a part of the sentence")
298
+ with gr.Column():
299
+ label1 = gr.Textbox(value="ORG",
300
+ label="Label for Span 1")
301
+ with gr.Column():
302
+ gr.Markdown("")
303
+ with gr.Column():
304
+ gr.Markdown("")
305
  with gr.Row():
306
+ with gr.Column():
307
+ span2 = gr.Textbox(
308
+ label="Span 2", value="U.K.", placeholder="Input another part of the sentence")
309
+ with gr.Column():
310
+ label2 = gr.Textbox(value="GPE",
311
+ label="Label for Span 2")
312
+ with gr.Column():
313
+ gr.Markdown("")
314
+ with gr.Column():
315
+ gr.Markdown("")
316
+ span_output = gr.HTML(value=span(
317
+ DEFAULT_TEXT, "U.K. startup", "U.K.", "ORG", "GPE", DEFAULT_MODEL))
318
  gr.Markdown(value="\n\n\n\n")
319
  gr.Markdown(value="\n\n\n\n")
320
  span_button = gr.Button("Generate spans")
321
+
322
  text_button.click(get_text, inputs=[model_input], outputs=text_input)
323
  button.click(dependency, inputs=[
324
  text_input, col_punct, col_phrase, compact, bg, text, model_input], outputs=depen_output)