Spaces:

mega-snowman
/

fuzzy-search

Sleeping

App Files Files Community

T.Masuda commited on Jun 26, 2024

Commit

f11ed3a

1 Parent(s): c109f53

update app.py

Browse files

Files changed (1) hide show

app.py +43 -39

app.py CHANGED Viewed

@@ -3,34 +3,50 @@ import numpy as np
 import re
 from rapidfuzz import fuzz
-id_list = []
-text_list = []
-def split_text(text: str) -> list[str]:
-	words = re.split('[ \t　]', text)
-	normalized_words = [word.strip().lower() for word in words if word.strip()]
-	return list(set(normalized_words))
-def normalize_text(text: str) -> str:
-	return ' '.join(split_text(text))
-def update_text_list(line: str):
-	fields = line.split(',')
-	if len(fields) < 2:
-		return
-	id = fields[0].strip()
-	text = ' '.join(fields[1:])
-	try:
-		index = id_list.index(id)
-		text_list[index] = normalize_text(f'{text_list[index]} {text}')
-	except ValueError:
-		id_list.append(id)
-		text_list.append(normalize_text(text))
-def calc_score(text: str, keyword: str) -> float:
-	keywords = split_text(keyword)
-	wordlist = split_text(text)
-	return sum(map(lambda k: max(map(lambda w: fuzz.ratio(w, k), wordlist)), keywords))
 def process_text(input_text: str, input_keyword: str) -> str:
 	if input_text is None or input_text.strip() == '':
@@ -39,21 +55,9 @@ def process_text(input_text: str, input_keyword: str) -> str:
 	if input_keyword is None or input_keyword.strip() == '':
 		print('no input_keyword')
 		return None
-	for line in input_text.replace('\r', '').split('\n'):
-		update_text_list(line)
-	if len(text_list) <= 0:
-		print('no data')
-		return None
-	s = np.empty(0)
-	for text in text_list:
-		s = np.append(s, calc_score(text, input_keyword))
-	index = np.argmax(s)
-	result_id = id_list[index]
-	result_desc = text_list[index]
-	print(f'{result_id} {result_desc}')
-	return result_id
 app = gr.Interface(
 	title='Fuzzy Search',

 import re
 from rapidfuzz import fuzz
+class Predictor:
+	def __init__(self):
+		self.id_list = []
+		self.text_list = []
+	def _split_text(self, text: str) -> list[str]:
+		words = re.split('[ \t　]', text)
+		normalized_words = [word.strip().lower() for word in words if word.strip()]
+		return list(set(normalized_words))
+	def _normalize_text(self, text: str) -> str:
+		return ' '.join(self._split_text(text))
+	def update_text_list(self, text: str):
+		for line in text.replace('\r', '').split('\n'):
+			fields = line.split(',')
+			if len(fields) < 2:
+				return
+			id = fields[0].strip()
+			text = ' '.join(fields[1:])
+			try:
+				index = self.id_list.index(id)
+				self.text_list[index] = self._normalize_text(f'{self.text_list[index]} {text}')
+			except ValueError:
+				self.id_list.append(id)
+				self.text_list.append(self._normalize_text(text))
+	def _calc_score(self, text: str, keyword: str) -> float:
+		keywords = self._split_text(keyword)
+		wordlist = self._split_text(text)
+		return sum(map(lambda k: max(map(lambda w: fuzz.ratio(w, k), wordlist)), keywords))
+	def predict(self, keyword: str) -> str:
+		if len(self.text_list) <= 0:
+			print('no data')
+			return ''
+		s = np.empty(0)
+		for text in self.text_list:
+			s = np.append(s, self._calc_score(text, keyword))
+		index = np.argmax(s)
+		result_id = self.id_list[index]
+		result_desc = self.text_list[index]
+		print(f'{result_id} {result_desc}')
+		return result_id
 def process_text(input_text: str, input_keyword: str) -> str:
 	if input_text is None or input_text.strip() == '':
 	if input_keyword is None or input_keyword.strip() == '':
 		print('no input_keyword')
 		return None
+	p = Predictor()
+	p.update_text_list(input_text)
+	return p.predict(input_keyword)
 app = gr.Interface(
 	title='Fuzzy Search',