AlzbetaStrompova commited on
Commit
1709ba8
1 Parent(s): 081d311

change layout

Browse files
Files changed (2) hide show
  1. app.py +30 -7
  2. website_script.py +6 -3
app.py CHANGED
@@ -1,9 +1,9 @@
 
1
  import gradio as gr
2
- from website_script import load, run
3
 
4
- print("Loading model")
5
- tokenizer, model, gazetteers_for_matching = load()
6
- print("Loaded model")
7
 
8
  examples = [
9
  "Masarykova univerzita se nachází v Brně .",
@@ -12,22 +12,45 @@ examples = [
12
  "Nobelova cena za fyziku byla udělena týmu vědců z MIT ."
13
  ]
14
 
 
 
 
 
 
 
 
 
 
15
  def ner(text):
 
 
16
  result = run(tokenizer, model, gazetteers_for_matching, text)
17
  return {"text": text, "entities": result}
 
 
 
 
 
 
 
 
 
 
 
 
18
  with gr.Blocks(css="footer{display:none !important}", theme=gr.themes.Default(primary_hue="blue", secondary_hue="sky")) as demo:
19
  # with gr.Blocks(theme=gr.themes.Soft()) as demo:
20
 
21
  gr.Interface(ner,
22
- gr.Textbox(lines=5, placeholder="Enter sentence here..."),
23
  gr.HighlightedText(show_legend=True, color_map={"PER": "red", "ORG": "green", "LOC": "blue"}),
24
  examples=examples,
25
  title="NerROB-czech",
26
  description="This is an implementation of a Named Entity Recognition model for the Czech language using gazetteers.",
27
  allow_flagging="never")
28
 
29
- gr.Interface(ner,
30
- gr.File(label="Upload a JSON file"),
31
  None,
32
  allow_flagging="never",
33
  description="Here you can upload your own gazetteers.",
 
1
+ import json
2
  import gradio as gr
3
+ from website_script import load, run, gaz
4
 
5
+ tokenizer, model = load()
6
+ gazetteers_for_matching = gaz()
 
7
 
8
  examples = [
9
  "Masarykova univerzita se nachází v Brně .",
 
12
  "Nobelova cena za fyziku byla udělena týmu vědců z MIT ."
13
  ]
14
 
15
+ def add_gazetteers(new_gazetteers):
16
+ global gazetteers_for_matching
17
+ for key, value_lst in new_gazetteers.items():
18
+ key = key.upper()
19
+ for dictionary in gazetteers_for_matching:
20
+ if key in dictionary.values():
21
+ for value in value_lst:
22
+ dictionary[value] = key
23
+
24
  def ner(text):
25
+ for d in gazetteers_for_matching:
26
+ print(len(d))
27
  result = run(tokenizer, model, gazetteers_for_matching, text)
28
  return {"text": text, "entities": result}
29
+
30
+ def load_gazetters(file_names):
31
+ print(file_names)
32
+ # Assuming you have a JSON file named 'data.json'
33
+ for file_name in file_names:
34
+ with open(file_name, 'r') as file:
35
+ data = json.load(file)
36
+ gazetteers_for_matching = add_gazetteers(data)
37
+
38
+
39
+
40
+
41
  with gr.Blocks(css="footer{display:none !important}", theme=gr.themes.Default(primary_hue="blue", secondary_hue="sky")) as demo:
42
  # with gr.Blocks(theme=gr.themes.Soft()) as demo:
43
 
44
  gr.Interface(ner,
45
+ gr.Textbox(lines=10, placeholder="Enter sentence here..."),
46
  gr.HighlightedText(show_legend=True, color_map={"PER": "red", "ORG": "green", "LOC": "blue"}),
47
  examples=examples,
48
  title="NerROB-czech",
49
  description="This is an implementation of a Named Entity Recognition model for the Czech language using gazetteers.",
50
  allow_flagging="never")
51
 
52
+ gr.Interface(load_gazetters,
53
+ gr.File(label="Upload a JSON file", file_count="multiple", file_types=[".json"]),
54
  None,
55
  allow_flagging="never",
56
  description="Here you can upload your own gazetteers.",
website_script.py CHANGED
@@ -9,18 +9,21 @@ from data_manipulation.preprocess_gazetteers import build_reverse_dictionary
9
  def load():
10
  model_name = "ufal/robeczech-base"
11
  model_path = "bettystr/NerRoB-czech"
12
- gazetteers_path = "gazz2.json"
13
 
14
  model = ExtendedEmbeddigsRobertaForTokenClassification.from_pretrained(model_path).to("cpu")
15
  tokenizer = AutoTokenizer.from_pretrained(model_name)
16
  model.eval()
 
 
 
 
 
17
  gazetteers_for_matching = load_gazetteers(gazetteers_path)
18
  temp = []
19
  for i in gazetteers_for_matching.keys():
20
  temp.append(build_reverse_dictionary({i: gazetteers_for_matching[i]}))
21
  gazetteers_for_matching = temp
22
- return tokenizer, model, gazetteers_for_matching
23
-
24
 
25
  def run(tokenizer, model, gazetteers_for_matching, text):
26
 
 
9
  def load():
10
  model_name = "ufal/robeczech-base"
11
  model_path = "bettystr/NerRoB-czech"
 
12
 
13
  model = ExtendedEmbeddigsRobertaForTokenClassification.from_pretrained(model_path).to("cpu")
14
  tokenizer = AutoTokenizer.from_pretrained(model_name)
15
  model.eval()
16
+ return tokenizer, model
17
+
18
+ def gaz():
19
+ gazetteers_path = "gazz2.json"
20
+
21
  gazetteers_for_matching = load_gazetteers(gazetteers_path)
22
  temp = []
23
  for i in gazetteers_for_matching.keys():
24
  temp.append(build_reverse_dictionary({i: gazetteers_for_matching[i]}))
25
  gazetteers_for_matching = temp
26
+ return gazetteers_for_matching
 
27
 
28
  def run(tokenizer, model, gazetteers_for_matching, text):
29