Spaces:
Runtime error
Runtime error
Commit
·
ca604c9
1
Parent(s):
88f0b71
Update Index.py
Browse files
Index.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
import os
|
3 |
-
|
|
|
4 |
|
5 |
import phoenix as px
|
6 |
from phoenix.trace.langchain import OpenInferenceTracer, LangChainInstrumentor
|
@@ -9,6 +10,7 @@ from phoenix.trace.langchain import OpenInferenceTracer, LangChainInstrumentor
|
|
9 |
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
|
10 |
from langchain.chains.question_answering import load_qa_chain
|
11 |
from langchain import HuggingFaceHub
|
|
|
12 |
|
13 |
from langchain.chains import RetrievalQA
|
14 |
from langchain.callbacks import StdOutCallbackHandler
|
@@ -114,8 +116,73 @@ def _prompt(question):
|
|
114 |
Context: """
|
115 |
|
116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
@app.get("/rag")
|
118 |
-
def rag( question: str):
|
119 |
|
120 |
chain = RetrievalQA.from_chain_type(
|
121 |
llm=llm,
|
@@ -126,10 +193,23 @@ def rag( question: str):
|
|
126 |
|
127 |
#response = chain("how tredence brought good insight?")
|
128 |
response = chain(_prompt(question))
|
129 |
-
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
-
return {"question": question, "answer": response['result'], "
|
133 |
|
134 |
|
135 |
initialize_vectorstore()
|
|
|
1 |
from fastapi import FastAPI
|
2 |
import os
|
3 |
+
import json
|
4 |
+
import pandas as pd
|
5 |
|
6 |
import phoenix as px
|
7 |
from phoenix.trace.langchain import OpenInferenceTracer, LangChainInstrumentor
|
|
|
10 |
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
|
11 |
from langchain.chains.question_answering import load_qa_chain
|
12 |
from langchain import HuggingFaceHub
|
13 |
+
from langchain.prompts import PromptTemplate
|
14 |
|
15 |
from langchain.chains import RetrievalQA
|
16 |
from langchain.callbacks import StdOutCallbackHandler
|
|
|
116 |
Context: """
|
117 |
|
118 |
|
119 |
+
|
120 |
+
class BasePromptContext:
|
121 |
+
def __init__(self):
|
122 |
+
self.variables_list = ["question","answer","context"]
|
123 |
+
self.base_template = """Please act as an impartial judge and evaluate the quality of the provided answer which attempts to answer the provided question based on a provided context.
|
124 |
+
And you'll need to submit your grading for the correctness, comprehensiveness and readability of the answer, using JSON format with the 2 items in parenthesis:
|
125 |
+
("score": [your score number for the correctness of the answer], "reasoning": [your one line step by step reasoning about the correctness of the answer])
|
126 |
+
Below is your grading rubric:
|
127 |
+
- Correctness: If the answer correctly answer the question, below are the details for different scores:
|
128 |
+
- Score 0: the answer is completely incorrect, doesn’t mention anything about the question or is completely contrary to the correct answer.
|
129 |
+
- For example, when asked “How to terminate a databricks cluster”, the answer is empty string, or content that’s completely irrelevant, or sorry I don’t know the answer.
|
130 |
+
- Score 4: the answer provides some relevance to the question and answer one aspect of the question correctly.
|
131 |
+
- Example:
|
132 |
+
- Question: How to terminate a databricks cluster
|
133 |
+
- Answer: Databricks cluster is a cloud-based computing environment that allows users to process big data and run distributed data processing tasks efficiently.
|
134 |
+
- Or answer: In the Databricks workspace, navigate to the "Clusters" tab. And then this is a hard question that I need to think more about it
|
135 |
+
- Score 7: the answer mostly answer the question but is missing or hallucinating on one critical aspect.
|
136 |
+
- Example:
|
137 |
+
- Question: How to terminate a databricks cluster”
|
138 |
+
- Answer: “In the Databricks workspace, navigate to the "Clusters" tab.
|
139 |
+
Find the cluster you want to terminate from the list of active clusters.
|
140 |
+
And then you’ll find a button to terminate all clusters at once”
|
141 |
+
- Score 10: the answer correctly answer the question and not missing any major aspect
|
142 |
+
- Example:
|
143 |
+
- Question: How to terminate a databricks cluster
|
144 |
+
- Answer: In the Databricks workspace, navigate to the "Clusters" tab.
|
145 |
+
Find the cluster you want to terminate from the list of active clusters.
|
146 |
+
Click on the down-arrow next to the cluster name to open the cluster details.
|
147 |
+
Click on the "Terminate" button. A confirmation dialog will appear. Click "Terminate" again to confirm the action.”
|
148 |
+
Provided question:
|
149 |
+
{question}
|
150 |
+
Provided answer:
|
151 |
+
{answer}
|
152 |
+
Provided context:
|
153 |
+
{context}
|
154 |
+
Please provide your grading for the correctness and explain you gave the particular grading"""
|
155 |
+
|
156 |
+
|
157 |
+
class Evaluater:
|
158 |
+
def __init__(self, item):
|
159 |
+
self.question = item["question"]
|
160 |
+
self.answer = item["answer"]
|
161 |
+
#self.domain = item["domain"]
|
162 |
+
self.context = item["context"]
|
163 |
+
self.llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000})
|
164 |
+
|
165 |
+
def get_prompt_template(self):
|
166 |
+
prompt = BasePromptContext()
|
167 |
+
template = prompt.base_template
|
168 |
+
varialbles = prompt.variables_list
|
169 |
+
eval_template = PromptTemplate(input_variables=varialbles, template=template)
|
170 |
+
return eval_template
|
171 |
+
|
172 |
+
def evaluate(self):
|
173 |
+
prompt = self.get_prompt_template().format(question = self.question, answer = self.answer, context = self.context)
|
174 |
+
score = self.llm(prompt)
|
175 |
+
return score
|
176 |
+
|
177 |
+
# Create extractor instance
|
178 |
+
def _create_evaluation_scenario(item):
|
179 |
+
score = Evaluater(item).evaluate()
|
180 |
+
return score
|
181 |
+
|
182 |
+
|
183 |
+
|
184 |
@app.get("/rag")
|
185 |
+
def rag( question: str, evaluate: bool):
|
186 |
|
187 |
chain = RetrievalQA.from_chain_type(
|
188 |
llm=llm,
|
|
|
193 |
|
194 |
#response = chain("how tredence brought good insight?")
|
195 |
response = chain(_prompt(question))
|
196 |
+
|
197 |
+
df = px.active_session().get_spans_dataframe().fillna('')
|
198 |
+
df_sorted = df.sort_values(by='end_time',ascending=False)
|
199 |
+
|
200 |
+
model_input = json.loads(df_sorted[df_sorted["name"] == "LLMChain"]["attributes.input.value"][0])
|
201 |
+
context = model_input["context"]
|
202 |
+
|
203 |
+
if evaluate:
|
204 |
+
score = _create_evaluation_scenario({
|
205 |
+
"question": question,
|
206 |
+
"answer": response['result'],
|
207 |
+
"context": context
|
208 |
+
})
|
209 |
+
else:
|
210 |
+
score = "Evaluation is Turned OFF"
|
211 |
|
212 |
+
return {"question": question, "answer": response['result'], "context": context, "score": score}
|
213 |
|
214 |
|
215 |
initialize_vectorstore()
|