T.Masuda commited on
Commit
f11ed3a
·
1 Parent(s): c109f53

update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -39
app.py CHANGED
@@ -3,34 +3,50 @@ import numpy as np
3
  import re
4
  from rapidfuzz import fuzz
5
 
6
- id_list = []
7
- text_list = []
 
 
8
 
9
- def split_text(text: str) -> list[str]:
10
- words = re.split('[ \t ]', text)
11
- normalized_words = [word.strip().lower() for word in words if word.strip()]
12
- return list(set(normalized_words))
13
 
14
- def normalize_text(text: str) -> str:
15
- return ' '.join(split_text(text))
16
 
17
- def update_text_list(line: str):
18
- fields = line.split(',')
19
- if len(fields) < 2:
20
- return
21
- id = fields[0].strip()
22
- text = ' '.join(fields[1:])
23
- try:
24
- index = id_list.index(id)
25
- text_list[index] = normalize_text(f'{text_list[index]} {text}')
26
- except ValueError:
27
- id_list.append(id)
28
- text_list.append(normalize_text(text))
 
29
 
30
- def calc_score(text: str, keyword: str) -> float:
31
- keywords = split_text(keyword)
32
- wordlist = split_text(text)
33
- return sum(map(lambda k: max(map(lambda w: fuzz.ratio(w, k), wordlist)), keywords))
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  def process_text(input_text: str, input_keyword: str) -> str:
36
  if input_text is None or input_text.strip() == '':
@@ -39,21 +55,9 @@ def process_text(input_text: str, input_keyword: str) -> str:
39
  if input_keyword is None or input_keyword.strip() == '':
40
  print('no input_keyword')
41
  return None
42
-
43
- for line in input_text.replace('\r', '').split('\n'):
44
- update_text_list(line)
45
- if len(text_list) <= 0:
46
- print('no data')
47
- return None
48
-
49
- s = np.empty(0)
50
- for text in text_list:
51
- s = np.append(s, calc_score(text, input_keyword))
52
- index = np.argmax(s)
53
- result_id = id_list[index]
54
- result_desc = text_list[index]
55
- print(f'{result_id} {result_desc}')
56
- return result_id
57
 
58
  app = gr.Interface(
59
  title='Fuzzy Search',
 
3
  import re
4
  from rapidfuzz import fuzz
5
 
6
+ class Predictor:
7
+ def __init__(self):
8
+ self.id_list = []
9
+ self.text_list = []
10
 
11
+ def _split_text(self, text: str) -> list[str]:
12
+ words = re.split('[ \t ]', text)
13
+ normalized_words = [word.strip().lower() for word in words if word.strip()]
14
+ return list(set(normalized_words))
15
 
16
+ def _normalize_text(self, text: str) -> str:
17
+ return ' '.join(self._split_text(text))
18
 
19
+ def update_text_list(self, text: str):
20
+ for line in text.replace('\r', '').split('\n'):
21
+ fields = line.split(',')
22
+ if len(fields) < 2:
23
+ return
24
+ id = fields[0].strip()
25
+ text = ' '.join(fields[1:])
26
+ try:
27
+ index = self.id_list.index(id)
28
+ self.text_list[index] = self._normalize_text(f'{self.text_list[index]} {text}')
29
+ except ValueError:
30
+ self.id_list.append(id)
31
+ self.text_list.append(self._normalize_text(text))
32
 
33
+ def _calc_score(self, text: str, keyword: str) -> float:
34
+ keywords = self._split_text(keyword)
35
+ wordlist = self._split_text(text)
36
+ return sum(map(lambda k: max(map(lambda w: fuzz.ratio(w, k), wordlist)), keywords))
37
+
38
+ def predict(self, keyword: str) -> str:
39
+ if len(self.text_list) <= 0:
40
+ print('no data')
41
+ return ''
42
+ s = np.empty(0)
43
+ for text in self.text_list:
44
+ s = np.append(s, self._calc_score(text, keyword))
45
+ index = np.argmax(s)
46
+ result_id = self.id_list[index]
47
+ result_desc = self.text_list[index]
48
+ print(f'{result_id} {result_desc}')
49
+ return result_id
50
 
51
  def process_text(input_text: str, input_keyword: str) -> str:
52
  if input_text is None or input_text.strip() == '':
 
55
  if input_keyword is None or input_keyword.strip() == '':
56
  print('no input_keyword')
57
  return None
58
+ p = Predictor()
59
+ p.update_text_list(input_text)
60
+ return p.predict(input_keyword)
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  app = gr.Interface(
63
  title='Fuzzy Search',