T.Masuda commited on
Commit
c109f53
·
1 Parent(s): 7faa705

create app

Browse files
Files changed (2) hide show
  1. app.py +71 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import re
4
+ from rapidfuzz import fuzz
5
+
6
+ id_list = []
7
+ text_list = []
8
+
9
+ def split_text(text: str) -> list[str]:
10
+ words = re.split('[ \t ]', text)
11
+ normalized_words = [word.strip().lower() for word in words if word.strip()]
12
+ return list(set(normalized_words))
13
+
14
+ def normalize_text(text: str) -> str:
15
+ return ' '.join(split_text(text))
16
+
17
+ def update_text_list(line: str):
18
+ fields = line.split(',')
19
+ if len(fields) < 2:
20
+ return
21
+ id = fields[0].strip()
22
+ text = ' '.join(fields[1:])
23
+ try:
24
+ index = id_list.index(id)
25
+ text_list[index] = normalize_text(f'{text_list[index]} {text}')
26
+ except ValueError:
27
+ id_list.append(id)
28
+ text_list.append(normalize_text(text))
29
+
30
+ def calc_score(text: str, keyword: str) -> float:
31
+ keywords = split_text(keyword)
32
+ wordlist = split_text(text)
33
+ return sum(map(lambda k: max(map(lambda w: fuzz.ratio(w, k), wordlist)), keywords))
34
+
35
+ def process_text(input_text: str, input_keyword: str) -> str:
36
+ if input_text is None or input_text.strip() == '':
37
+ print('no input_text')
38
+ return None
39
+ if input_keyword is None or input_keyword.strip() == '':
40
+ print('no input_keyword')
41
+ return None
42
+
43
+ for line in input_text.replace('\r', '').split('\n'):
44
+ update_text_list(line)
45
+ if len(text_list) <= 0:
46
+ print('no data')
47
+ return None
48
+
49
+ s = np.empty(0)
50
+ for text in text_list:
51
+ s = np.append(s, calc_score(text, input_keyword))
52
+ index = np.argmax(s)
53
+ result_id = id_list[index]
54
+ result_desc = text_list[index]
55
+ print(f'{result_id} {result_desc}')
56
+ return result_id
57
+
58
+ app = gr.Interface(
59
+ title='Fuzzy Search',
60
+ fn=process_text,
61
+ inputs=[
62
+ gr.Textbox(label='text (comma separated text for id and description)', lines=10),
63
+ gr.Textbox(label='search keywords')
64
+ ],
65
+ outputs=[
66
+ gr.Textbox(label='predicted id'),
67
+ ],
68
+ allow_flagging='never',
69
+ concurrency_limit=20,
70
+ )
71
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ numpy
3
+ rapidfuzz