Spaces:

mega-snowman
/

fuzzy-search

Sleeping

App Files Files Community

T.Masuda commited on Jun 26, 2024

Commit

c109f53

1 Parent(s): 7faa705

create app

Browse files

Files changed (2) hide show

app.py +71 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import gradio as gr
+import numpy as np
+import re
+from rapidfuzz import fuzz
+id_list = []
+text_list = []
+def split_text(text: str) -> list[str]:
+	words = re.split('[ \t　]', text)
+	normalized_words = [word.strip().lower() for word in words if word.strip()]
+	return list(set(normalized_words))
+def normalize_text(text: str) -> str:
+	return ' '.join(split_text(text))
+def update_text_list(line: str):
+	fields = line.split(',')
+	if len(fields) < 2:
+		return
+	id = fields[0].strip()
+	text = ' '.join(fields[1:])
+	try:
+		index = id_list.index(id)
+		text_list[index] = normalize_text(f'{text_list[index]} {text}')
+	except ValueError:
+		id_list.append(id)
+		text_list.append(normalize_text(text))
+def calc_score(text: str, keyword: str) -> float:
+	keywords = split_text(keyword)
+	wordlist = split_text(text)
+	return sum(map(lambda k: max(map(lambda w: fuzz.ratio(w, k), wordlist)), keywords))
+def process_text(input_text: str, input_keyword: str) -> str:
+	if input_text is None or input_text.strip() == '':
+		print('no input_text')
+		return None
+	if input_keyword is None or input_keyword.strip() == '':
+		print('no input_keyword')
+		return None
+	for line in input_text.replace('\r', '').split('\n'):
+		update_text_list(line)
+	if len(text_list) <= 0:
+		print('no data')
+		return None
+	s = np.empty(0)
+	for text in text_list:
+		s = np.append(s, calc_score(text, input_keyword))
+	index = np.argmax(s)
+	result_id = id_list[index]
+	result_desc = text_list[index]
+	print(f'{result_id} {result_desc}')
+	return result_id
+app = gr.Interface(
+	title='Fuzzy Search',
+	fn=process_text,
+	inputs=[
+		gr.Textbox(label='text (comma separated text for id and description)', lines=10),
+		gr.Textbox(label='search keywords')
+	],
+	outputs=[
+		gr.Textbox(label='predicted id'),
+	],
+	allow_flagging='never',
+	concurrency_limit=20,
+)
+app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+numpy
+rapidfuzz