Spaces:
Sleeping
Sleeping
Fix evaluation score
Browse files- __pycache__/classes.cpython-311.pyc +0 -0
- __pycache__/utils_evaluate.cpython-311.pyc +0 -0
- __pycache__/utils_evaluate_objections.cpython-311.pyc +0 -0
- __pycache__/utils_opportunity_review.cpython-311.pyc +0 -0
- __pycache__/utils_output.cpython-311.pyc +0 -0
- __pycache__/utils_prep.cpython-311.pyc +0 -0
- __pycache__/utils_prompt.cpython-311.pyc +0 -0
- classes.py +1 -1
- utils_evaluate.py +7 -4
- utils_evaluate_objections.py +18 -15
- utils_output.py +7 -5
- utils_prompt.py +3 -0
__pycache__/classes.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/classes.cpython-311.pyc and b/__pycache__/classes.cpython-311.pyc differ
|
|
|
__pycache__/utils_evaluate.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/utils_evaluate.cpython-311.pyc and b/__pycache__/utils_evaluate.cpython-311.pyc differ
|
|
|
__pycache__/utils_evaluate_objections.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/utils_evaluate_objections.cpython-311.pyc and b/__pycache__/utils_evaluate_objections.cpython-311.pyc differ
|
|
|
__pycache__/utils_opportunity_review.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/utils_opportunity_review.cpython-311.pyc and b/__pycache__/utils_opportunity_review.cpython-311.pyc differ
|
|
|
__pycache__/utils_output.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/utils_output.cpython-311.pyc and b/__pycache__/utils_output.cpython-311.pyc differ
|
|
|
__pycache__/utils_prep.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/utils_prep.cpython-311.pyc and b/__pycache__/utils_prep.cpython-311.pyc differ
|
|
|
__pycache__/utils_prompt.cpython-311.pyc
CHANGED
|
Binary files a/__pycache__/utils_prompt.cpython-311.pyc and b/__pycache__/utils_prompt.cpython-311.pyc differ
|
|
|
classes.py
CHANGED
|
@@ -41,7 +41,7 @@ class SessionState:
|
|
| 41 |
self.do_opportunity_analysis = True
|
| 42 |
self.do_customer_research = True
|
| 43 |
self.do_objections = False
|
| 44 |
-
self.add_objections_to_analysis =
|
| 45 |
self.ask_objections = True
|
| 46 |
self.use_objection_cache = True
|
| 47 |
self.do_ragas_evaluation = False
|
|
|
|
| 41 |
self.do_opportunity_analysis = True
|
| 42 |
self.do_customer_research = True
|
| 43 |
self.do_objections = False
|
| 44 |
+
self.add_objections_to_analysis = True
|
| 45 |
self.ask_objections = True
|
| 46 |
self.use_objection_cache = True
|
| 47 |
self.do_ragas_evaluation = False
|
utils_evaluate.py
CHANGED
|
@@ -10,10 +10,11 @@ from ragas.metrics import (
|
|
| 10 |
from rouge_score import rouge_scorer
|
| 11 |
from sentence_transformers import SentenceTransformer, util
|
| 12 |
|
| 13 |
-
from utils_evaluate_objections import
|
| 14 |
|
| 15 |
|
| 16 |
-
def evaluate_objections(session):
|
|
|
|
| 17 |
|
| 18 |
for response in session.responses:
|
| 19 |
question = response.get("question", "")
|
|
@@ -24,8 +25,10 @@ def evaluate_objections(session):
|
|
| 24 |
q_and_a = {
|
| 25 |
"objection": question,
|
| 26 |
"answer": answer
|
| 27 |
-
}
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
response["evaluation_score"] = score
|
| 30 |
|
| 31 |
|
|
|
|
| 10 |
from rouge_score import rouge_scorer
|
| 11 |
from sentence_transformers import SentenceTransformer, util
|
| 12 |
|
| 13 |
+
from utils_evaluate_objections import generate_objection_score
|
| 14 |
|
| 15 |
|
| 16 |
+
async def evaluate_objections(session):
|
| 17 |
+
print("evaluate_objections()")
|
| 18 |
|
| 19 |
for response in session.responses:
|
| 20 |
question = response.get("question", "")
|
|
|
|
| 25 |
q_and_a = {
|
| 26 |
"objection": question,
|
| 27 |
"answer": answer
|
| 28 |
+
}
|
| 29 |
+
print(q_and_a)
|
| 30 |
+
score = await generate_objection_score(q_and_a)
|
| 31 |
+
print(score)
|
| 32 |
response["evaluation_score"] = score
|
| 33 |
|
| 34 |
|
utils_evaluate_objections.py
CHANGED
|
@@ -1,18 +1,22 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
import pandas as pd
|
| 5 |
-
from typing import List, Tuple
|
| 6 |
-
from datetime import datetime
|
| 7 |
import sys
|
|
|
|
|
|
|
| 8 |
from dataclasses import dataclass, field
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
from ragas.metrics.base import MetricType
|
| 10 |
-
from ragas.
|
| 11 |
-
from ragas import
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
import os
|
| 16 |
# Load environment variables from .env file
|
| 17 |
dotenv.load_dotenv()
|
| 18 |
|
|
@@ -92,10 +96,8 @@ class SatisfyRate(MetricWithLLM, SingleTurnMetric):
|
|
| 92 |
)
|
| 93 |
return int(prompt_response.satisfy)
|
| 94 |
|
| 95 |
-
async def
|
| 96 |
-
|
| 97 |
-
from ragas.llms.base import LangchainLLMWrapper
|
| 98 |
-
import pandas as pd
|
| 99 |
# user_response= pd.read_csv(file_path)
|
| 100 |
openai_model = LangchainLLMWrapper(ChatOpenAI(model_name="gpt-4o", api_key=OPENAI_API_KEY))
|
| 101 |
scorer = SatisfyRate(llm=openai_model)
|
|
@@ -104,6 +106,7 @@ async def generate_objection_scores(question_answer):
|
|
| 104 |
|
| 105 |
#(user_response['objection'][num], user_response['response'][num])
|
| 106 |
satisfy_0_1 = await scorer.single_turn_ascore(sample)
|
|
|
|
| 107 |
|
| 108 |
print (question_answer['objection'], question_answer['answer'], satisfy_0_1)
|
| 109 |
# Implement your logic to generate a response based on the user's input
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import dotenv
|
| 3 |
+
import os
|
| 4 |
import pandas as pd
|
|
|
|
|
|
|
| 5 |
import sys
|
| 6 |
+
import typing as t
|
| 7 |
+
|
| 8 |
from dataclasses import dataclass, field
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from langchain_openai import ChatOpenAI
|
| 11 |
+
from pydantic import BaseModel, Field
|
| 12 |
+
from ragas import SingleTurnSample
|
| 13 |
+
from ragas.llms.base import LangchainLLMWrapper
|
| 14 |
from ragas.metrics.base import MetricType
|
| 15 |
+
from ragas.metrics.base import MetricWithLLM, SingleTurnMetric
|
| 16 |
+
from ragas.prompt.pydantic_prompt import PydanticPrompt
|
| 17 |
+
from typing import List, Tuple
|
| 18 |
+
|
| 19 |
+
|
|
|
|
| 20 |
# Load environment variables from .env file
|
| 21 |
dotenv.load_dotenv()
|
| 22 |
|
|
|
|
| 96 |
)
|
| 97 |
return int(prompt_response.satisfy)
|
| 98 |
|
| 99 |
+
async def generate_objection_score(question_answer):
|
| 100 |
+
print("generate_objection_scores()")
|
|
|
|
|
|
|
| 101 |
# user_response= pd.read_csv(file_path)
|
| 102 |
openai_model = LangchainLLMWrapper(ChatOpenAI(model_name="gpt-4o", api_key=OPENAI_API_KEY))
|
| 103 |
scorer = SatisfyRate(llm=openai_model)
|
|
|
|
| 106 |
|
| 107 |
#(user_response['objection'][num], user_response['response'][num])
|
| 108 |
satisfy_0_1 = await scorer.single_turn_ascore(sample)
|
| 109 |
+
print(satisfy_0_1)
|
| 110 |
|
| 111 |
print (question_answer['objection'], question_answer['answer'], satisfy_0_1)
|
| 112 |
# Implement your logic to generate a response based on the user's input
|
utils_output.py
CHANGED
|
@@ -60,11 +60,13 @@ def format_datetime(dt):
|
|
| 60 |
async def display_evaluation_results(cl, session_state):
|
| 61 |
out_text = "*Preparing evaluation results ...*"
|
| 62 |
await cl.Message(content=out_text).send()
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
if session_state.do_evaluation:
|
| 65 |
evaluate_answers(session_state)
|
| 66 |
elif session_state.add_objections_to_analysis:
|
| 67 |
-
evaluate_objections(session_state)
|
| 68 |
await asyncio.sleep(1)
|
| 69 |
|
| 70 |
output = f"**Session Summary**"
|
|
@@ -82,9 +84,9 @@ async def display_evaluation_results(cl, session_state):
|
|
| 82 |
averages = results_df[columns_to_average].mean()
|
| 83 |
|
| 84 |
await cl.Message(content="**Overall Summary (By SalesBuddy)**").send()
|
| 85 |
-
output = f"**SalesBuddy Score:** {session_state.responses[-1]['overall_score']} \n"
|
| 86 |
output = output + f"**SalesBuddy Evaluation:** {session_state.responses[-1]['overall_evaluation']} \n"
|
| 87 |
-
output = output + f"**SalesBuddy Final Mood Score:** {session_state.responses[-1]['mood_score']} \n"
|
| 88 |
await cl.Message(content=output).send()
|
| 89 |
|
| 90 |
if session_state.do_ragas_evaluation:
|
|
@@ -101,7 +103,7 @@ async def display_evaluation_results(cl, session_state):
|
|
| 101 |
**Question:** {resp.get('question', 'N/A')}
|
| 102 |
**Answer:** {resp.get('response', 'N/A')}
|
| 103 |
**SalesBuddy Evaluation:** {resp.get('response_evaluation', 'N/A')}
|
| 104 |
-
**Evaluation Score:** {resp.get('
|
| 105 |
"""
|
| 106 |
if session_state.do_ragas_evaluation:
|
| 107 |
scores = session_state.scores[index]
|
|
|
|
| 60 |
async def display_evaluation_results(cl, session_state):
|
| 61 |
out_text = "*Preparing evaluation results ...*"
|
| 62 |
await cl.Message(content=out_text).send()
|
| 63 |
+
print("Checking evaluation and objection flags")
|
| 64 |
+
print(session_state.do_evaluation)
|
| 65 |
+
print(session_state.add_objections_to_analysis)
|
| 66 |
if session_state.do_evaluation:
|
| 67 |
evaluate_answers(session_state)
|
| 68 |
elif session_state.add_objections_to_analysis:
|
| 69 |
+
await evaluate_objections(session_state)
|
| 70 |
await asyncio.sleep(1)
|
| 71 |
|
| 72 |
output = f"**Session Summary**"
|
|
|
|
| 84 |
averages = results_df[columns_to_average].mean()
|
| 85 |
|
| 86 |
await cl.Message(content="**Overall Summary (By SalesBuddy)**").send()
|
| 87 |
+
output = f"**SalesBuddy Score (1-10):** {session_state.responses[-1]['overall_score']} \n"
|
| 88 |
output = output + f"**SalesBuddy Evaluation:** {session_state.responses[-1]['overall_evaluation']} \n"
|
| 89 |
+
output = output + f"**SalesBuddy Final Mood Score (1-10):** {session_state.responses[-1]['mood_score']} \n"
|
| 90 |
await cl.Message(content=output).send()
|
| 91 |
|
| 92 |
if session_state.do_ragas_evaluation:
|
|
|
|
| 103 |
**Question:** {resp.get('question', 'N/A')}
|
| 104 |
**Answer:** {resp.get('response', 'N/A')}
|
| 105 |
**SalesBuddy Evaluation:** {resp.get('response_evaluation', 'N/A')}
|
| 106 |
+
**Evaluation Score:** {resp.get('evaluation_score', 'N/A')}
|
| 107 |
"""
|
| 108 |
if session_state.do_ragas_evaluation:
|
| 109 |
scores = session_state.scores[index]
|
utils_prompt.py
CHANGED
|
@@ -103,6 +103,7 @@ def get_system_template_openai_short():
|
|
| 103 |
You are playing a role in a conversation with a sales representative.
|
| 104 |
Your name is in the 'Name:' section.
|
| 105 |
They can use your first name, full name or address you with a title and last name.
|
|
|
|
| 106 |
Your name does not need to match exactly what they say.
|
| 107 |
Be chatty and conversational and friendly.
|
| 108 |
Your compnay information is in the 'Company:' section.
|
|
@@ -116,10 +117,12 @@ def get_system_template_openai_short():
|
|
| 116 |
You can make conversation but you must follow the command.
|
| 117 |
If a previous question and answer are provided, you must evaluate the rep's answer.
|
| 118 |
You will perform evaluation based on how well and thoroughly the rep answered the previous question.
|
|
|
|
| 119 |
If asked to provide a conclusion, you must consider all of the rep's answers to your questions.
|
| 120 |
These are provided in the 'All questions and answers:' section.
|
| 121 |
You will ALWAYS provide your response in valid JSON format
|
| 122 |
Remember all string values must be enclosed in double quotes.
|
|
|
|
| 123 |
You will include with the following fields in JSON format:
|
| 124 |
- Continue: Yes or No depending on if you want to continue the conversation based on the reps answer to your question.
|
| 125 |
- Ask Follow Up: Yes or No depending on if you want to ask a follow up question.
|
|
|
|
| 103 |
You are playing a role in a conversation with a sales representative.
|
| 104 |
Your name is in the 'Name:' section.
|
| 105 |
They can use your first name, full name or address you with a title and last name.
|
| 106 |
+
If they get your name wrong, you can correct them once.
|
| 107 |
Your name does not need to match exactly what they say.
|
| 108 |
Be chatty and conversational and friendly.
|
| 109 |
Your compnay information is in the 'Company:' section.
|
|
|
|
| 117 |
You can make conversation but you must follow the command.
|
| 118 |
If a previous question and answer are provided, you must evaluate the rep's answer.
|
| 119 |
You will perform evaluation based on how well and thoroughly the rep answered the previous question.
|
| 120 |
+
If the reps answer does not make sense or is not clear, set the score to a 1.
|
| 121 |
If asked to provide a conclusion, you must consider all of the rep's answers to your questions.
|
| 122 |
These are provided in the 'All questions and answers:' section.
|
| 123 |
You will ALWAYS provide your response in valid JSON format
|
| 124 |
Remember all string values must be enclosed in double quotes.
|
| 125 |
+
Remember do not include a question in your response.
|
| 126 |
You will include with the following fields in JSON format:
|
| 127 |
- Continue: Yes or No depending on if you want to continue the conversation based on the reps answer to your question.
|
| 128 |
- Ask Follow Up: Yes or No depending on if you want to ask a follow up question.
|