Spaces:

Shakhovak
/

Sheldon_Retrieval_chat_bot

Runtime error

App Files Files Community

shakhovak commited on Feb 16, 2024

Commit

decf09e

1 Parent(s): bddcd9a

updates

Browse files

Files changed (2) hide show

data/low_score_sripts.json +37 -0
retrieve_bot.py +12 -22

data/low_score_sripts.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{"generic":[
+    "What does it mean?",
+    "You have two strikes. Three strikes and you' re out. It' s a sports metaphor. Explain again!",
+    "Again, urban slang. In which, I believe I' m gaining remarkable fluency. So, could you repeat?",
+    "I'm confused.",
+    "I can't comment without violating our agreement that I don' t criticize you.",
+    "Oh!",
+    "I need to use the restroom.",
+    "Move. Move. Move!",
+    "I was going to mention it at the time, but then I thought, some day maybe...",
+    "Well...",
+    "Apparently... I have no idea!?",
+    "I'm not sure...",
+    "Nothing. I say nothing.",
+    "Well, my friend. Focus and repeat!",
+    "I don't follow.",
+    "Thank you. Can we just talk about something else",
+    "Aw…",
+    "I have insufficient data to proceed. Excuse me?"
+],
+ "greetings": [
+    "Hello.",
+    "Hello to you, insufficiently intelligent person.",
+    "Hello, my friend.",
+    "Hi,my friend. I’ m sorry I’ m late, but your companion left the most indecipherable invitation.",
+    "Hi,my friend. It’ s me, Sheldon. In the living room. I just, I wanted you to know I saw the tie. Message received. You’ re welcome. You carry on.",
+    "Hi, uh,my friend, this circular is addressed to occupant, but with our apartment switch, it’ s unclear whether it’ s yours or mine.",
+    "Hi",
+    "Hi. Hello. Oh, and a special hello to my friend, who needs to be mentioned by name.",
+    "Hello. So I guess you’ re really holding up the other four fingers?",
+    "Hi. Um, I’ ve reconsidered. Uh, you can’ t work where I work. Enjoy the rest of your evening."
+]
+}

retrieve_bot.py CHANGED Viewed

@@ -12,29 +12,12 @@ from utils import (
 from collections import deque
 from transformers import pipeline
 import torch
 from transformers import AutoTokenizer
 from dialog_tag import DialogTag
 # this class representes main functions of retrieve bot
-low_scoring_list = [
-    "What does it mean?",
-    "You have two strikes. Three strikes and you’ re out. It’ s a sports metaphor. Explain again!",
-    "Again, urban slang. In which, I believe I’ m gaining remarkable fluency. So, could you repeat?",
-    "I’m confused.",
-    "I can’t comment without violating our agreement that I don’ t criticize you.",
-    "Oh!",
-    "I need to use the restroom.",
-    "Move. Move. Move!",
-    "I was going to mention it at the time, but then I thought, some day maybe...",
-    "Well...",
-    "Apparently... I have no idea!?",
-    "I’m not sure...",
-    "Nothing. I say nothing.",
-    "Well, my friend. Focus and repeat!",
-]
 class ChatBot:
     def __init__(self):
         self.vect_data = []
@@ -45,6 +28,7 @@ class ChatBot:
         self.reranking_model = None
         self.device = None
         self.tokenizer = None
     def load(self):
         """ "This method is called first to load all datasets and
@@ -54,6 +38,8 @@ class ChatBot:
         with open("data/scripts_vectors.pkl", "rb") as fp:
             self.vect_data = pickle.load(fp)
         self.scripts = pd.read_pickle("data/scripts.pkl")
         self.tag_model = DialogTag("distilbert-base-uncased")
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.ranking_model = SentenceTransformer(
@@ -83,11 +69,15 @@ class ChatBot:
             query_encoding,
         )
         top_scores, top_indexes = top_candidates(
-            bot_cosine_scores, intent=intent, initial_data=self.scripts, top=6
         )
-        if top_scores[0] < 0.8:
-            answer = random.choice(low_scoring_list)
-            self.conversation_history.clear()
         else:
             # test candidates and collects them with label 0 to dictionary

 from collections import deque
 from transformers import pipeline
 import torch
+import json
 from transformers import AutoTokenizer
 from dialog_tag import DialogTag
 # this class representes main functions of retrieve bot
 class ChatBot:
     def __init__(self):
         self.vect_data = []
         self.reranking_model = None
         self.device = None
         self.tokenizer = None
+        self.low_scoring_list = None
     def load(self):
         """ "This method is called first to load all datasets and
         with open("data/scripts_vectors.pkl", "rb") as fp:
             self.vect_data = pickle.load(fp)
         self.scripts = pd.read_pickle("data/scripts.pkl")
+        with open('low_score_sripts.json', 'r') as f:
+            self.low_scoring_list = json.load(f)
         self.tag_model = DialogTag("distilbert-base-uncased")
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.ranking_model = SentenceTransformer(
             query_encoding,
         )
         top_scores, top_indexes = top_candidates(
+            bot_cosine_scores, intent=intent, initial_data=self.scripts, top=5
         )
+        if top_scores[0] < 0.9:
+            if intent == "greetings":
+                answer = random.choice(self.low_scoring_list['greetings'])
+                self.conversation_history.clear()
+            else:
+                answer = random.choice(self.low_scoring_list['generic'])
+                self.conversation_history.clear()
         else:
             # test candidates and collects them with label 0 to dictionary