Spaces:

mega-snowman
/

fuzzy-search

Sleeping

fuzzy-search / app.py

T.Masuda

update app.py

f11ed3a about 1 year ago

2.19 kB

	import gradio as gr
	import numpy as np
	import re
	from rapidfuzz import fuzz

	class Predictor:
	def __init__(self):
	self.id_list = []
	self.text_list = []

	def _split_text(self, text: str) -> list[str]:
	words = re.split('[ \t　]', text)
	normalized_words = [word.strip().lower() for word in words if word.strip()]
	return list(set(normalized_words))

	def _normalize_text(self, text: str) -> str:
	return ' '.join(self._split_text(text))

	def update_text_list(self, text: str):
	for line in text.replace('\r', '').split('\n'):
	fields = line.split(',')
	if len(fields) < 2:
	return
	id = fields[0].strip()
	text = ' '.join(fields[1:])
	try:
	index = self.id_list.index(id)
	self.text_list[index] = self._normalize_text(f'{self.text_list[index]} {text}')
	except ValueError:
	self.id_list.append(id)
	self.text_list.append(self._normalize_text(text))

	def _calc_score(self, text: str, keyword: str) -> float:
	keywords = self._split_text(keyword)
	wordlist = self._split_text(text)
	return sum(map(lambda k: max(map(lambda w: fuzz.ratio(w, k), wordlist)), keywords))

	def predict(self, keyword: str) -> str:
	if len(self.text_list) <= 0:
	print('no data')
	return ''
	s = np.empty(0)
	for text in self.text_list:
	s = np.append(s, self._calc_score(text, keyword))
	index = np.argmax(s)
	result_id = self.id_list[index]
	result_desc = self.text_list[index]
	print(f'{result_id} {result_desc}')
	return result_id

	def process_text(input_text: str, input_keyword: str) -> str:
	if input_text is None or input_text.strip() == '':
	print('no input_text')
	return None
	if input_keyword is None or input_keyword.strip() == '':
	print('no input_keyword')
	return None
	p = Predictor()
	p.update_text_list(input_text)
	return p.predict(input_keyword)

	app = gr.Interface(
	title='Fuzzy Search',
	fn=process_text,
	inputs=[
	gr.Textbox(label='text (comma separated text for id and description)', lines=10),
	gr.Textbox(label='search keywords')
	],
	outputs=[
	gr.Textbox(label='predicted id'),
	],
	allow_flagging='never',
	concurrency_limit=20,
	)
	app.launch()