agshiv92 commited on
Commit
e844f9d
·
1 Parent(s): f15b82e

Upload files to 'task5_model_evaluation'

Browse files
task5_model_evaluation/Evaluation using prompt.ipynb ADDED
@@ -0,0 +1,1208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# Import the required libraries"
8
+ ]
9
+ },
10
+ {
11
+ "cell_type": "code",
12
+ "execution_count": 11,
13
+ "metadata": {},
14
+ "outputs": [],
15
+ "source": [
16
+ "from ragas.llms import LangchainLLMWrapper\n",
17
+ "from ragas.embeddings import LangchainEmbeddingsWrapper\n",
18
+ "from langchain_community.chat_models import ChatOllama\n",
19
+ "from langchain_community.embeddings import OllamaEmbeddings\n",
20
+ "from langchain.text_splitter import TokenTextSplitter\n",
21
+ "from ragas.testset.extractor import KeyphraseExtractor\n",
22
+ "from ragas.testset.docstore import InMemoryDocumentStore\n",
23
+ "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
24
+ "from langchain_community.document_loaders import DirectoryLoader"
25
+ ]
26
+ },
27
+ {
28
+ "cell_type": "markdown",
29
+ "metadata": {},
30
+ "source": [
31
+ "# Starting the model using ollama"
32
+ ]
33
+ },
34
+ {
35
+ "cell_type": "code",
36
+ "execution_count": 4,
37
+ "metadata": {},
38
+ "outputs": [],
39
+ "source": [
40
+ "### Build\n",
41
+ "ragas_llm = ChatOllama(model=\"llama3:8b\")\n",
42
+ "embeddings = OllamaEmbeddings(model=\"llama3:8b\")"
43
+ ]
44
+ },
45
+ {
46
+ "cell_type": "markdown",
47
+ "metadata": {},
48
+ "source": [
49
+ "# Loading the document from the directory"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "code",
54
+ "execution_count": null,
55
+ "metadata": {},
56
+ "outputs": [],
57
+ "source": [
58
+ "# Try loading from a simple directory with plain text files\n",
59
+ "loader = DirectoryLoader(r\"C:\\Users\\agshi\\Desktop\\Omdena\\Canada Policy\\TorontoCanadaChapter_CanPolicyInsight\\task5_model_evaluation\\data\")\n",
60
+ "\n",
61
+ "# Load documents\n",
62
+ "documents = loader.load()\n"
63
+ ]
64
+ },
65
+ {
66
+ "cell_type": "markdown",
67
+ "metadata": {},
68
+ "source": [
69
+ "# Creating chunks of documents"
70
+ ]
71
+ },
72
+ {
73
+ "cell_type": "code",
74
+ "execution_count": 12,
75
+ "metadata": {},
76
+ "outputs": [
77
+ {
78
+ "name": "stdout",
79
+ "output_type": "stream",
80
+ "text": [
81
+ "58\n"
82
+ ]
83
+ }
84
+ ],
85
+ "source": [
86
+ "# Creating chunks of documents\n",
87
+ "text_splitter = RecursiveCharacterTextSplitter(\n",
88
+ " chunk_size=2000,\n",
89
+ " chunk_overlap=200,\n",
90
+ " add_start_index=True,\n",
91
+ " separators=[\"\\n\\n\", \"\\n\", \".\", \" \", \"\", \"\\n\\n\\n\"],\n",
92
+ ")\n",
93
+ "\n",
94
+ "docs_processed = []\n",
95
+ "for doc in documents:\n",
96
+ " docs_processed += text_splitter.split_documents([doc])\n",
97
+ "\n",
98
+ "print(len(docs_processed))"
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "markdown",
103
+ "metadata": {},
104
+ "source": [
105
+ "# Prompts for test set Generation"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 46,
111
+ "metadata": {},
112
+ "outputs": [],
113
+ "source": [
114
+ "QA_generation_prompt_template = \"\"\"\n",
115
+ "Your task is to write a factoid question and an answer given a context.\n",
116
+ "Your factoid question should be answerable with a specific, concise piece of factual information from the context.\n",
117
+ "Your factoid question should be formulated in the same style as questions users could ask in a search engine.\n",
118
+ "This means that your factoid question MUST NOT mention something like \"according to the passage\" or \"context\".\n",
119
+ "YOU MUST NOT MENTION in the factoid question Here is the factoid question and answer based on the given context\n",
120
+ "Provide your answer as follows:\n",
121
+ "\n",
122
+ "Output:::\n",
123
+ "Factoid question: (your factoid question)\n",
124
+ "Answer: (your answer to the factoid question)\n",
125
+ "\n",
126
+ "Now here is the context.\n",
127
+ "\n",
128
+ "Context: {context}\n",
129
+ "Output:::\n",
130
+ "\"\"\""
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": 24,
136
+ "metadata": {},
137
+ "outputs": [
138
+ {
139
+ "name": "stdout",
140
+ "output_type": "stream",
141
+ "text": [
142
+ "page_content='munications Commission (the “Commission”) must regulate\n",
143
+ "\n",
144
+ "and supervise the Canadian broadcasting system in a manner\n",
145
+ "\n",
146
+ "that\n",
147
+ "\n",
148
+ "(i) takes into account the different characteristics of En-\n",
149
+ "\n",
150
+ "glish, French and Indigenous language broadcasting and\n",
151
+ "\n",
152
+ "the different conditions under which broadcasting under-\n",
153
+ "\n",
154
+ "takings that provide English, French or Indigenous lan-\n",
155
+ "\n",
156
+ "guage programming operate,\n",
157
+ "\n",
158
+ "(ii) takes into account, among other things, the nature and\n",
159
+ "\n",
160
+ "diversity of the services provided by broadcasting under-\n",
161
+ "\n",
162
+ "takings,\n",
163
+ "\n",
164
+ "(iii) ensures that any broadcasting undertaking that can-\n",
165
+ "\n",
166
+ "not make maximum or predominant use of Canadian cre-\n",
167
+ "\n",
168
+ "ative and other human resources in the creation, produc-\n",
169
+ "\n",
170
+ "tion and presentation of programming contributes to\n",
171
+ "\n",
172
+ "those Canadian resources in an equitable manner,\n",
173
+ "\n",
174
+ "(iv) promotes innovation and is readily adaptable to sci-\n",
175
+ "\n",
176
+ "entific and technological change,\n",
177
+ "\n",
178
+ "(v) facilitates the provision to Canadians of Canadian pro-\n",
179
+ "\n",
180
+ "grams in both official languages, including those created\n",
181
+ "\n",
182
+ "and produced by official language minority communities\n",
183
+ "\n",
184
+ "Available on the House of Commons website at the following address:\n",
185
+ "\n",
186
+ "www.ourcommons.ca\n",
187
+ "\n",
188
+ "2021-2022-2023\n",
189
+ "\n",
190
+ "Page 3\n",
191
+ "\n",
192
+ "in Canada, as well as Canadian programs in Indigenous\n",
193
+ "\n",
194
+ "languages,\n",
195
+ "\n",
196
+ "(vi) facilitates the provision of programs that are accessi-\n",
197
+ "\n",
198
+ "ble without barriers to persons with disabilities,\n",
199
+ "\n",
200
+ "(vii) facilitates the provision to Canadians of programs\n",
201
+ "\n",
202
+ "created and produced by members of Black or other\n",
203
+ "\n",
204
+ "racialized communities,\n",
205
+ "\n",
206
+ "(viii) protects the privacy of individuals who are members\n",
207
+ "\n",
208
+ "of the audience of programs broadcast, and\n",
209
+ "\n",
210
+ "(ix) takes into account the variety of broadcasting under-\n",
211
+ "\n",
212
+ "takings to which the Act applies and avoids imposing obli-\n",
213
+ "\n",
214
+ "gations on any class of broadcasting undertakings if that\n",
215
+ "\n",
216
+ "imposition will not contribute in a material manner to the\n",
217
+ "\n",
218
+ "implementation of the broadcasting policy;\n",
219
+ "\n",
220
+ "(f) amend the procedure relating to the issuance by the Gov-\n",
221
+ "\n",
222
+ "ernor in Council of policy directions to the Commission;' metadata={'source': 'C:\\\\Users\\\\agshi\\\\Desktop\\\\Omdena\\\\Canada Policy\\\\TorontoCanadaChapter_CanPolicyInsight\\\\task5_model_evaluation\\\\data\\\\C-11_4.txt', 'start_index': 1765}\n"
223
+ ]
224
+ }
225
+ ],
226
+ "source": [
227
+ "print(docs_processed[1])"
228
+ ]
229
+ },
230
+ {
231
+ "cell_type": "code",
232
+ "execution_count": 58,
233
+ "metadata": {},
234
+ "outputs": [],
235
+ "source": [
236
+ "def store_factoid_question_answer(text):\n",
237
+ " # Split the text into question and answer parts\n",
238
+ " factoid_dict = {}\n",
239
+ " parts = text.split(\"Answer:\", 1)\n",
240
+ " \n",
241
+ " if len(parts) == 2:\n",
242
+ " question = parts[0].replace(\"Factoid question:\", \"\").strip()\n",
243
+ " answer = parts[1].strip()\n",
244
+ " \n",
245
+ " # Create a dictionary to store the question and answer\n",
246
+ " factoid_dict = {\n",
247
+ " \"question\": question,\n",
248
+ " \"answer\": answer\n",
249
+ " }\n",
250
+ " \n",
251
+ " return factoid_dict\n",
252
+ " else:\n",
253
+ " return None"
254
+ ]
255
+ },
256
+ {
257
+ "cell_type": "code",
258
+ "execution_count": 78,
259
+ "metadata": {},
260
+ "outputs": [],
261
+ "source": [
262
+ "qa_dict = {}\n",
263
+ "factoid = {}\n",
264
+ "qa_results = []\n",
265
+ "for doc in docs_processed[:10]: # Limiting to first 2 documents for testing\n",
266
+ " # Extract the page content from the Document object\n",
267
+ " page_content = doc.page_content\n",
268
+ " \n",
269
+ " # Generate the prompt for the current document using the template\n",
270
+ " QA_generation_prompt = QA_generation_prompt_template.format(context=page_content)\n",
271
+ " \n",
272
+ " # Invoke the LLM with the generated prompt\n",
273
+ " response = ragas_llm.invoke(QA_generation_prompt)\n",
274
+ " content = response.content\n",
275
+ " # Use the function to extract question and answer from the response\n",
276
+ " factoid = store_factoid_question_answer(content)\n",
277
+ " \n",
278
+ " if factoid:\n",
279
+ " # Store the question, answer, and context in a dictionary\n",
280
+ " qa_dict = {\n",
281
+ " \"question\": factoid['question'],\n",
282
+ " \"answer\": factoid['answer'],\n",
283
+ " \"context\": page_content\n",
284
+ " }\n",
285
+ " \n",
286
+ " # Append the dictionary to the results list\n",
287
+ " qa_results.append(qa_dict)\n",
288
+ " else:\n",
289
+ " print(\"Failed to parse response.\")"
290
+ ]
291
+ },
292
+ {
293
+ "cell_type": "code",
294
+ "execution_count": 79,
295
+ "metadata": {},
296
+ "outputs": [],
297
+ "source": [
298
+ "import pandas as pd\n",
299
+ "df = pd.DataFrame(qa_results)"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": 80,
305
+ "metadata": {},
306
+ "outputs": [
307
+ {
308
+ "data": {
309
+ "text/html": [
310
+ "<div>\n",
311
+ "<style scoped>\n",
312
+ " .dataframe tbody tr th:only-of-type {\n",
313
+ " vertical-align: middle;\n",
314
+ " }\n",
315
+ "\n",
316
+ " .dataframe tbody tr th {\n",
317
+ " vertical-align: top;\n",
318
+ " }\n",
319
+ "\n",
320
+ " .dataframe thead th {\n",
321
+ " text-align: right;\n",
322
+ " }\n",
323
+ "</style>\n",
324
+ "<table border=\"1\" class=\"dataframe\">\n",
325
+ " <thead>\n",
326
+ " <tr style=\"text-align: right;\">\n",
327
+ " <th></th>\n",
328
+ " <th>question</th>\n",
329
+ " <th>answer</th>\n",
330
+ " <th>context</th>\n",
331
+ " </tr>\n",
332
+ " </thead>\n",
333
+ " <tbody>\n",
334
+ " <tr>\n",
335
+ " <th>0</th>\n",
336
+ " <td>Here is my answer:\\n\\n What does Bill C-11 spe...</td>\n",
337
+ " <td>Unless the programs are prescribed by regulati...</td>\n",
338
+ " <td>Page 1\\n\\nFirst Session, Forty-fourth Parliame...</td>\n",
339
+ " </tr>\n",
340
+ " <tr>\n",
341
+ " <th>1</th>\n",
342
+ " <td>Here is the factoid question and answer based ...</td>\n",
343
+ " <td>It facilitates the provision of programs that ...</td>\n",
344
+ " <td>munications Commission (the “Commission”) must...</td>\n",
345
+ " </tr>\n",
346
+ " <tr>\n",
347
+ " <th>2</th>\n",
348
+ " <td>What type of undertakings can a person carry o...</td>\n",
349
+ " <td>Other than an online undertaking.</td>\n",
350
+ " <td>implementation of the broadcasting policy;\\n\\n...</td>\n",
351
+ " </tr>\n",
352
+ " <tr>\n",
353
+ " <th>3</th>\n",
354
+ " <td>What is defined as \"broadcasting\" in the Broad...</td>\n",
355
+ " <td>Any transmission of programs by radio waves or...</td>\n",
356
+ " <td>Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...</td>\n",
357
+ " </tr>\n",
358
+ " <tr>\n",
359
+ " <th>4</th>\n",
360
+ " <td>What is meant by \"online undertaking\" in the c...</td>\n",
361
+ " <td>An online undertaking means an undertaking for...</td>\n",
362
+ " <td>er undertaking or person, but does not include...</td>\n",
363
+ " </tr>\n",
364
+ " <tr>\n",
365
+ " <th>5</th>\n",
366
+ " <td>What does not carry on a broadcasting undertak...</td>\n",
367
+ " <td>A person who uses a social media service to up...</td>\n",
368
+ " <td>officielle en situation minoritaire)\\n\\n2021-2...</td>\n",
369
+ " </tr>\n",
370
+ " <tr>\n",
371
+ " <th>6</th>\n",
372
+ " <td>Here is the factoid question and answer based ...</td>\n",
373
+ " <td>The implementation of the objectives of the br...</td>\n",
374
+ " <td>(b) that is part of the operations of a primar...</td>\n",
375
+ " </tr>\n",
376
+ " <tr>\n",
377
+ " <th>7</th>\n",
378
+ " <td>What is the purpose of subsection (iii) of the...</td>\n",
379
+ " <td>Through its programming and employment opportu...</td>\n",
380
+ " <td>placed by the following:\\n\\n(ii) encourage the...</td>\n",
381
+ " </tr>\n",
382
+ " <tr>\n",
383
+ " <th>8</th>\n",
384
+ " <td>What is one way in which the Broadcasting Act ...</td>\n",
385
+ " <td>By supporting the production and broadcasting ...</td>\n",
386
+ " <td>(iii.2) support the production and broadcastin...</td>\n",
387
+ " </tr>\n",
388
+ " <tr>\n",
389
+ " <th>9</th>\n",
390
+ " <td>What is the responsibility of all persons who ...</td>\n",
391
+ " <td>They have a responsibility for the programs th...</td>\n",
392
+ " <td>(vi) ensure freedom of expression and journali...</td>\n",
393
+ " </tr>\n",
394
+ " </tbody>\n",
395
+ "</table>\n",
396
+ "</div>"
397
+ ],
398
+ "text/plain": [
399
+ " question \\\n",
400
+ "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n",
401
+ "1 Here is the factoid question and answer based ... \n",
402
+ "2 What type of undertakings can a person carry o... \n",
403
+ "3 What is defined as \"broadcasting\" in the Broad... \n",
404
+ "4 What is meant by \"online undertaking\" in the c... \n",
405
+ "5 What does not carry on a broadcasting undertak... \n",
406
+ "6 Here is the factoid question and answer based ... \n",
407
+ "7 What is the purpose of subsection (iii) of the... \n",
408
+ "8 What is one way in which the Broadcasting Act ... \n",
409
+ "9 What is the responsibility of all persons who ... \n",
410
+ "\n",
411
+ " answer \\\n",
412
+ "0 Unless the programs are prescribed by regulati... \n",
413
+ "1 It facilitates the provision of programs that ... \n",
414
+ "2 Other than an online undertaking. \n",
415
+ "3 Any transmission of programs by radio waves or... \n",
416
+ "4 An online undertaking means an undertaking for... \n",
417
+ "5 A person who uses a social media service to up... \n",
418
+ "6 The implementation of the objectives of the br... \n",
419
+ "7 Through its programming and employment opportu... \n",
420
+ "8 By supporting the production and broadcasting ... \n",
421
+ "9 They have a responsibility for the programs th... \n",
422
+ "\n",
423
+ " context \n",
424
+ "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... \n",
425
+ "1 munications Commission (the “Commission”) must... \n",
426
+ "2 implementation of the broadcasting policy;\\n\\n... \n",
427
+ "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... \n",
428
+ "4 er undertaking or person, but does not include... \n",
429
+ "5 officielle en situation minoritaire)\\n\\n2021-2... \n",
430
+ "6 (b) that is part of the operations of a primar... \n",
431
+ "7 placed by the following:\\n\\n(ii) encourage the... \n",
432
+ "8 (iii.2) support the production and broadcastin... \n",
433
+ "9 (vi) ensure freedom of expression and journali... "
434
+ ]
435
+ },
436
+ "execution_count": 80,
437
+ "metadata": {},
438
+ "output_type": "execute_result"
439
+ }
440
+ ],
441
+ "source": [
442
+ "df.head(10)"
443
+ ]
444
+ },
445
+ {
446
+ "cell_type": "markdown",
447
+ "metadata": {},
448
+ "source": [
449
+ "# Evaluation of Answers using LLMs"
450
+ ]
451
+ },
452
+ {
453
+ "cell_type": "code",
454
+ "execution_count": 81,
455
+ "metadata": {},
456
+ "outputs": [],
457
+ "source": [
458
+ "groundedness_prompt_template = \"\"\"\n",
459
+ "You will be given a context and a question.\n",
460
+ "Your task is to provide a 'total rating' scoring how well one can answer the given question unambiguously with the given context.\n",
461
+ "Give your answer as a single integer on a scale of 1 to 5, where 1 means that the question is not answerable at all given the context, and 5 means that the question is clearly and unambiguously answerable with the context.\n",
462
+ "\n",
463
+ "Please respond with only a single integer, without any additional text.\n",
464
+ "\n",
465
+ "Context: {context}\n",
466
+ "\n",
467
+ "Question: {question}\n",
468
+ "\n",
469
+ "Rating (1-5):\n",
470
+ "\"\"\"\n"
471
+ ]
472
+ },
473
+ {
474
+ "cell_type": "code",
475
+ "execution_count": 82,
476
+ "metadata": {},
477
+ "outputs": [],
478
+ "source": [
479
+ "# Add a new column to the DataFrame for storing the groundedness scores\n",
480
+ "df['groundedness_score'] = None\n",
481
+ "\n",
482
+ "# Loop over each row in the DataFrame to generate the evaluation prompts and get the scores\n",
483
+ "for index, row in df.iterrows():\n",
484
+ " # Generate the evaluation prompt\n",
485
+ " evaluation_prompt = groundedness_prompt_template.format(context=row['context'], question=row['question'])\n",
486
+ " \n",
487
+ " # Invoke the LLM with the evaluation prompt\n",
488
+ " response = ragas_llm.invoke(evaluation_prompt)\n",
489
+ " \n",
490
+ " # Extract the rating from the response\n",
491
+ " rating = response.content.strip() # Assuming the response contains just the rating\n",
492
+ " \n",
493
+ " try:\n",
494
+ " # Store the rating in the DataFrame\n",
495
+ " df.at[index, 'groundedness_score'] = int(rating)\n",
496
+ " except ValueError:\n",
497
+ " print(f\"Invalid rating '{rating}' received for index {index}. Skipping...\")"
498
+ ]
499
+ },
500
+ {
501
+ "cell_type": "code",
502
+ "execution_count": 83,
503
+ "metadata": {},
504
+ "outputs": [
505
+ {
506
+ "data": {
507
+ "text/html": [
508
+ "<div>\n",
509
+ "<style scoped>\n",
510
+ " .dataframe tbody tr th:only-of-type {\n",
511
+ " vertical-align: middle;\n",
512
+ " }\n",
513
+ "\n",
514
+ " .dataframe tbody tr th {\n",
515
+ " vertical-align: top;\n",
516
+ " }\n",
517
+ "\n",
518
+ " .dataframe thead th {\n",
519
+ " text-align: right;\n",
520
+ " }\n",
521
+ "</style>\n",
522
+ "<table border=\"1\" class=\"dataframe\">\n",
523
+ " <thead>\n",
524
+ " <tr style=\"text-align: right;\">\n",
525
+ " <th></th>\n",
526
+ " <th>question</th>\n",
527
+ " <th>answer</th>\n",
528
+ " <th>context</th>\n",
529
+ " <th>groundedness_score</th>\n",
530
+ " </tr>\n",
531
+ " </thead>\n",
532
+ " <tbody>\n",
533
+ " <tr>\n",
534
+ " <th>0</th>\n",
535
+ " <td>Here is my answer:\\n\\n What does Bill C-11 spe...</td>\n",
536
+ " <td>Unless the programs are prescribed by regulati...</td>\n",
537
+ " <td>Page 1\\n\\nFirst Session, Forty-fourth Parliame...</td>\n",
538
+ " <td>4</td>\n",
539
+ " </tr>\n",
540
+ " <tr>\n",
541
+ " <th>1</th>\n",
542
+ " <td>Here is the factoid question and answer based ...</td>\n",
543
+ " <td>It facilitates the provision of programs that ...</td>\n",
544
+ " <td>munications Commission (the “Commission”) must...</td>\n",
545
+ " <td>4</td>\n",
546
+ " </tr>\n",
547
+ " <tr>\n",
548
+ " <th>2</th>\n",
549
+ " <td>What type of undertakings can a person carry o...</td>\n",
550
+ " <td>Other than an online undertaking.</td>\n",
551
+ " <td>implementation of the broadcasting policy;\\n\\n...</td>\n",
552
+ " <td>4</td>\n",
553
+ " </tr>\n",
554
+ " <tr>\n",
555
+ " <th>3</th>\n",
556
+ " <td>What is defined as \"broadcasting\" in the Broad...</td>\n",
557
+ " <td>Any transmission of programs by radio waves or...</td>\n",
558
+ " <td>Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...</td>\n",
559
+ " <td>4</td>\n",
560
+ " </tr>\n",
561
+ " <tr>\n",
562
+ " <th>4</th>\n",
563
+ " <td>What is meant by \"online undertaking\" in the c...</td>\n",
564
+ " <td>An online undertaking means an undertaking for...</td>\n",
565
+ " <td>er undertaking or person, but does not include...</td>\n",
566
+ " <td>4</td>\n",
567
+ " </tr>\n",
568
+ " </tbody>\n",
569
+ "</table>\n",
570
+ "</div>"
571
+ ],
572
+ "text/plain": [
573
+ " question \\\n",
574
+ "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n",
575
+ "1 Here is the factoid question and answer based ... \n",
576
+ "2 What type of undertakings can a person carry o... \n",
577
+ "3 What is defined as \"broadcasting\" in the Broad... \n",
578
+ "4 What is meant by \"online undertaking\" in the c... \n",
579
+ "\n",
580
+ " answer \\\n",
581
+ "0 Unless the programs are prescribed by regulati... \n",
582
+ "1 It facilitates the provision of programs that ... \n",
583
+ "2 Other than an online undertaking. \n",
584
+ "3 Any transmission of programs by radio waves or... \n",
585
+ "4 An online undertaking means an undertaking for... \n",
586
+ "\n",
587
+ " context groundedness_score \n",
588
+ "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... 4 \n",
589
+ "1 munications Commission (the “Commission”) must... 4 \n",
590
+ "2 implementation of the broadcasting policy;\\n\\n... 4 \n",
591
+ "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... 4 \n",
592
+ "4 er undertaking or person, but does not include... 4 "
593
+ ]
594
+ },
595
+ "execution_count": 83,
596
+ "metadata": {},
597
+ "output_type": "execute_result"
598
+ }
599
+ ],
600
+ "source": [
601
+ "df.head()"
602
+ ]
603
+ },
604
+ {
605
+ "cell_type": "code",
606
+ "execution_count": 84,
607
+ "metadata": {},
608
+ "outputs": [],
609
+ "source": [
610
+ "relevancy_prompt_template = \"\"\"\n",
611
+ "You will be given a context, a question, and an answer.\n",
612
+ "Your task is to provide a 'relevancy rating' scoring how relevant the answer is to the given question based on the context.\n",
613
+ "Give your answer as a single integer on a scale of 1 to 5, where 1 means that the answer is not relevant at all to the question given the context, and 5 means that the answer is highly relevant to the question given the context.\n",
614
+ "\n",
615
+ "Please respond with only a single integer, without any additional text.\n",
616
+ "\n",
617
+ "Context: {context}\n",
618
+ "\n",
619
+ "Question: {question}\n",
620
+ "\n",
621
+ "Answer: {answer}\n",
622
+ "\n",
623
+ "Relevancy Rating (1-5):\n",
624
+ "\"\"\"\n"
625
+ ]
626
+ },
627
+ {
628
+ "cell_type": "code",
629
+ "execution_count": 85,
630
+ "metadata": {},
631
+ "outputs": [],
632
+ "source": [
633
+ "# Add a new column to the DataFrame for storing the relevancy scores\n",
634
+ "df['relevancy_score'] = None\n",
635
+ "\n",
636
+ "# Loop over each row in the DataFrame to generate the evaluation prompts and get the scores\n",
637
+ "for index, row in df.iterrows():\n",
638
+ " # Generate the evaluation prompt for relevancy\n",
639
+ " relevancy_prompt = relevancy_prompt_template.format(context=row['context'], question=row['question'], answer=row['answer'])\n",
640
+ " \n",
641
+ " # Invoke the LLM with the evaluation prompt\n",
642
+ " response = ragas_llm.invoke(relevancy_prompt)\n",
643
+ " \n",
644
+ " # Extract the rating from the response\n",
645
+ " rating = response.content.strip() # Assuming the response contains just the rating\n",
646
+ " \n",
647
+ " try:\n",
648
+ " # Store the rating in the DataFrame\n",
649
+ " df.at[index, 'relevancy_score'] = int(rating)\n",
650
+ " except ValueError:\n",
651
+ " print(f\"Invalid rating '{rating}' received for index {index}. Skipping...\")\n"
652
+ ]
653
+ },
654
+ {
655
+ "cell_type": "code",
656
+ "execution_count": 86,
657
+ "metadata": {},
658
+ "outputs": [
659
+ {
660
+ "data": {
661
+ "text/html": [
662
+ "<div>\n",
663
+ "<style scoped>\n",
664
+ " .dataframe tbody tr th:only-of-type {\n",
665
+ " vertical-align: middle;\n",
666
+ " }\n",
667
+ "\n",
668
+ " .dataframe tbody tr th {\n",
669
+ " vertical-align: top;\n",
670
+ " }\n",
671
+ "\n",
672
+ " .dataframe thead th {\n",
673
+ " text-align: right;\n",
674
+ " }\n",
675
+ "</style>\n",
676
+ "<table border=\"1\" class=\"dataframe\">\n",
677
+ " <thead>\n",
678
+ " <tr style=\"text-align: right;\">\n",
679
+ " <th></th>\n",
680
+ " <th>question</th>\n",
681
+ " <th>answer</th>\n",
682
+ " <th>context</th>\n",
683
+ " <th>groundedness_score</th>\n",
684
+ " <th>relevancy_score</th>\n",
685
+ " </tr>\n",
686
+ " </thead>\n",
687
+ " <tbody>\n",
688
+ " <tr>\n",
689
+ " <th>0</th>\n",
690
+ " <td>Here is my answer:\\n\\n What does Bill C-11 spe...</td>\n",
691
+ " <td>Unless the programs are prescribed by regulati...</td>\n",
692
+ " <td>Page 1\\n\\nFirst Session, Forty-fourth Parliame...</td>\n",
693
+ " <td>4</td>\n",
694
+ " <td>5</td>\n",
695
+ " </tr>\n",
696
+ " <tr>\n",
697
+ " <th>1</th>\n",
698
+ " <td>Here is the factoid question and answer based ...</td>\n",
699
+ " <td>It facilitates the provision of programs that ...</td>\n",
700
+ " <td>munications Commission (the “Commission”) must...</td>\n",
701
+ " <td>4</td>\n",
702
+ " <td>5</td>\n",
703
+ " </tr>\n",
704
+ " <tr>\n",
705
+ " <th>2</th>\n",
706
+ " <td>What type of undertakings can a person carry o...</td>\n",
707
+ " <td>Other than an online undertaking.</td>\n",
708
+ " <td>implementation of the broadcasting policy;\\n\\n...</td>\n",
709
+ " <td>4</td>\n",
710
+ " <td>4</td>\n",
711
+ " </tr>\n",
712
+ " <tr>\n",
713
+ " <th>3</th>\n",
714
+ " <td>What is defined as \"broadcasting\" in the Broad...</td>\n",
715
+ " <td>Any transmission of programs by radio waves or...</td>\n",
716
+ " <td>Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...</td>\n",
717
+ " <td>4</td>\n",
718
+ " <td>5</td>\n",
719
+ " </tr>\n",
720
+ " <tr>\n",
721
+ " <th>4</th>\n",
722
+ " <td>What is meant by \"online undertaking\" in the c...</td>\n",
723
+ " <td>An online undertaking means an undertaking for...</td>\n",
724
+ " <td>er undertaking or person, but does not include...</td>\n",
725
+ " <td>4</td>\n",
726
+ " <td>5</td>\n",
727
+ " </tr>\n",
728
+ " <tr>\n",
729
+ " <th>5</th>\n",
730
+ " <td>What does not carry on a broadcasting undertak...</td>\n",
731
+ " <td>A person who uses a social media service to up...</td>\n",
732
+ " <td>officielle en situation minoritaire)\\n\\n2021-2...</td>\n",
733
+ " <td>4</td>\n",
734
+ " <td>4</td>\n",
735
+ " </tr>\n",
736
+ " <tr>\n",
737
+ " <th>6</th>\n",
738
+ " <td>Here is the factoid question and answer based ...</td>\n",
739
+ " <td>The implementation of the objectives of the br...</td>\n",
740
+ " <td>(b) that is part of the operations of a primar...</td>\n",
741
+ " <td>4</td>\n",
742
+ " <td>4</td>\n",
743
+ " </tr>\n",
744
+ " <tr>\n",
745
+ " <th>7</th>\n",
746
+ " <td>What is the purpose of subsection (iii) of the...</td>\n",
747
+ " <td>Through its programming and employment opportu...</td>\n",
748
+ " <td>placed by the following:\\n\\n(ii) encourage the...</td>\n",
749
+ " <td>4</td>\n",
750
+ " <td>4</td>\n",
751
+ " </tr>\n",
752
+ " <tr>\n",
753
+ " <th>8</th>\n",
754
+ " <td>What is one way in which the Broadcasting Act ...</td>\n",
755
+ " <td>By supporting the production and broadcasting ...</td>\n",
756
+ " <td>(iii.2) support the production and broadcastin...</td>\n",
757
+ " <td>4</td>\n",
758
+ " <td>5</td>\n",
759
+ " </tr>\n",
760
+ " <tr>\n",
761
+ " <th>9</th>\n",
762
+ " <td>What is the responsibility of all persons who ...</td>\n",
763
+ " <td>They have a responsibility for the programs th...</td>\n",
764
+ " <td>(vi) ensure freedom of expression and journali...</td>\n",
765
+ " <td>5</td>\n",
766
+ " <td>4</td>\n",
767
+ " </tr>\n",
768
+ " </tbody>\n",
769
+ "</table>\n",
770
+ "</div>"
771
+ ],
772
+ "text/plain": [
773
+ " question \\\n",
774
+ "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n",
775
+ "1 Here is the factoid question and answer based ... \n",
776
+ "2 What type of undertakings can a person carry o... \n",
777
+ "3 What is defined as \"broadcasting\" in the Broad... \n",
778
+ "4 What is meant by \"online undertaking\" in the c... \n",
779
+ "5 What does not carry on a broadcasting undertak... \n",
780
+ "6 Here is the factoid question and answer based ... \n",
781
+ "7 What is the purpose of subsection (iii) of the... \n",
782
+ "8 What is one way in which the Broadcasting Act ... \n",
783
+ "9 What is the responsibility of all persons who ... \n",
784
+ "\n",
785
+ " answer \\\n",
786
+ "0 Unless the programs are prescribed by regulati... \n",
787
+ "1 It facilitates the provision of programs that ... \n",
788
+ "2 Other than an online undertaking. \n",
789
+ "3 Any transmission of programs by radio waves or... \n",
790
+ "4 An online undertaking means an undertaking for... \n",
791
+ "5 A person who uses a social media service to up... \n",
792
+ "6 The implementation of the objectives of the br... \n",
793
+ "7 Through its programming and employment opportu... \n",
794
+ "8 By supporting the production and broadcasting ... \n",
795
+ "9 They have a responsibility for the programs th... \n",
796
+ "\n",
797
+ " context groundedness_score \\\n",
798
+ "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... 4 \n",
799
+ "1 munications Commission (the “Commission”) must... 4 \n",
800
+ "2 implementation of the broadcasting policy;\\n\\n... 4 \n",
801
+ "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... 4 \n",
802
+ "4 er undertaking or person, but does not include... 4 \n",
803
+ "5 officielle en situation minoritaire)\\n\\n2021-2... 4 \n",
804
+ "6 (b) that is part of the operations of a primar... 4 \n",
805
+ "7 placed by the following:\\n\\n(ii) encourage the... 4 \n",
806
+ "8 (iii.2) support the production and broadcastin... 4 \n",
807
+ "9 (vi) ensure freedom of expression and journali... 5 \n",
808
+ "\n",
809
+ " relevancy_score \n",
810
+ "0 5 \n",
811
+ "1 5 \n",
812
+ "2 4 \n",
813
+ "3 5 \n",
814
+ "4 5 \n",
815
+ "5 4 \n",
816
+ "6 4 \n",
817
+ "7 4 \n",
818
+ "8 5 \n",
819
+ "9 4 "
820
+ ]
821
+ },
822
+ "execution_count": 86,
823
+ "metadata": {},
824
+ "output_type": "execute_result"
825
+ }
826
+ ],
827
+ "source": [
828
+ "df"
829
+ ]
830
+ },
831
+ {
832
+ "cell_type": "code",
833
+ "execution_count": 87,
834
+ "metadata": {},
835
+ "outputs": [],
836
+ "source": [
837
+ "answer_relevancy_prompt_template = \"\"\"\n",
838
+ "You will be given a context, a question, and an answer.\n",
839
+ "Your task is to provide an 'answer relevancy rating' scoring how relevant the answer is to the given question based on the context.\n",
840
+ "Give your answer as a single integer on a scale of 1 to 5, where 1 means that the answer is not relevant at all to the question given the context, and 5 means that the answer is highly relevant to the question given the context.\n",
841
+ "\n",
842
+ "Please respond with only a single integer, without any additional text.\n",
843
+ "\n",
844
+ "Context: {context}\n",
845
+ "\n",
846
+ "Question: {question}\n",
847
+ "\n",
848
+ "Answer: {answer}\n",
849
+ "\n",
850
+ "Answer Relevancy Rating (1-5):\n",
851
+ "\"\"\"\n"
852
+ ]
853
+ },
854
+ {
855
+ "cell_type": "code",
856
+ "execution_count": 88,
857
+ "metadata": {},
858
+ "outputs": [
859
+ {
860
+ "data": {
861
+ "text/html": [
862
+ "<div>\n",
863
+ "<style scoped>\n",
864
+ " .dataframe tbody tr th:only-of-type {\n",
865
+ " vertical-align: middle;\n",
866
+ " }\n",
867
+ "\n",
868
+ " .dataframe tbody tr th {\n",
869
+ " vertical-align: top;\n",
870
+ " }\n",
871
+ "\n",
872
+ " .dataframe thead th {\n",
873
+ " text-align: right;\n",
874
+ " }\n",
875
+ "</style>\n",
876
+ "<table border=\"1\" class=\"dataframe\">\n",
877
+ " <thead>\n",
878
+ " <tr style=\"text-align: right;\">\n",
879
+ " <th></th>\n",
880
+ " <th>question</th>\n",
881
+ " <th>answer</th>\n",
882
+ " <th>context</th>\n",
883
+ " <th>groundedness_score</th>\n",
884
+ " <th>relevancy_score</th>\n",
885
+ " <th>answer_relevancy_score</th>\n",
886
+ " </tr>\n",
887
+ " </thead>\n",
888
+ " <tbody>\n",
889
+ " <tr>\n",
890
+ " <th>0</th>\n",
891
+ " <td>Here is my answer:\\n\\n What does Bill C-11 spe...</td>\n",
892
+ " <td>Unless the programs are prescribed by regulati...</td>\n",
893
+ " <td>Page 1\\n\\nFirst Session, Forty-fourth Parliame...</td>\n",
894
+ " <td>4</td>\n",
895
+ " <td>5</td>\n",
896
+ " <td>4</td>\n",
897
+ " </tr>\n",
898
+ " <tr>\n",
899
+ " <th>1</th>\n",
900
+ " <td>Here is the factoid question and answer based ...</td>\n",
901
+ " <td>It facilitates the provision of programs that ...</td>\n",
902
+ " <td>munications Commission (the “Commission”) must...</td>\n",
903
+ " <td>4</td>\n",
904
+ " <td>5</td>\n",
905
+ " <td>5</td>\n",
906
+ " </tr>\n",
907
+ " <tr>\n",
908
+ " <th>2</th>\n",
909
+ " <td>What type of undertakings can a person carry o...</td>\n",
910
+ " <td>Other than an online undertaking.</td>\n",
911
+ " <td>implementation of the broadcasting policy;\\n\\n...</td>\n",
912
+ " <td>4</td>\n",
913
+ " <td>4</td>\n",
914
+ " <td>4</td>\n",
915
+ " </tr>\n",
916
+ " <tr>\n",
917
+ " <th>3</th>\n",
918
+ " <td>What is defined as \"broadcasting\" in the Broad...</td>\n",
919
+ " <td>Any transmission of programs by radio waves or...</td>\n",
920
+ " <td>Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...</td>\n",
921
+ " <td>4</td>\n",
922
+ " <td>5</td>\n",
923
+ " <td>4</td>\n",
924
+ " </tr>\n",
925
+ " <tr>\n",
926
+ " <th>4</th>\n",
927
+ " <td>What is meant by \"online undertaking\" in the c...</td>\n",
928
+ " <td>An online undertaking means an undertaking for...</td>\n",
929
+ " <td>er undertaking or person, but does not include...</td>\n",
930
+ " <td>4</td>\n",
931
+ " <td>5</td>\n",
932
+ " <td>5</td>\n",
933
+ " </tr>\n",
934
+ " </tbody>\n",
935
+ "</table>\n",
936
+ "</div>"
937
+ ],
938
+ "text/plain": [
939
+ " question \\\n",
940
+ "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n",
941
+ "1 Here is the factoid question and answer based ... \n",
942
+ "2 What type of undertakings can a person carry o... \n",
943
+ "3 What is defined as \"broadcasting\" in the Broad... \n",
944
+ "4 What is meant by \"online undertaking\" in the c... \n",
945
+ "\n",
946
+ " answer \\\n",
947
+ "0 Unless the programs are prescribed by regulati... \n",
948
+ "1 It facilitates the provision of programs that ... \n",
949
+ "2 Other than an online undertaking. \n",
950
+ "3 Any transmission of programs by radio waves or... \n",
951
+ "4 An online undertaking means an undertaking for... \n",
952
+ "\n",
953
+ " context groundedness_score \\\n",
954
+ "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... 4 \n",
955
+ "1 munications Commission (the “Commission”) must... 4 \n",
956
+ "2 implementation of the broadcasting policy;\\n\\n... 4 \n",
957
+ "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... 4 \n",
958
+ "4 er undertaking or person, but does not include... 4 \n",
959
+ "\n",
960
+ " relevancy_score answer_relevancy_score \n",
961
+ "0 5 4 \n",
962
+ "1 5 5 \n",
963
+ "2 4 4 \n",
964
+ "3 5 4 \n",
965
+ "4 5 5 "
966
+ ]
967
+ },
968
+ "execution_count": 88,
969
+ "metadata": {},
970
+ "output_type": "execute_result"
971
+ }
972
+ ],
973
+ "source": [
974
+ "# Assuming df is your existing DataFrame with 'question', 'answer', and 'context' columns\n",
975
+ "df['answer_relevancy_score'] = None\n",
976
+ "\n",
977
+ "# Loop over each row in the DataFrame to generate the evaluation prompts and get the scores\n",
978
+ "for index, row in df.iterrows():\n",
979
+ " # Generate the evaluation prompt for answer relevancy\n",
980
+ " answer_relevancy_prompt = answer_relevancy_prompt_template.format(\n",
981
+ " context=row['context'],\n",
982
+ " question=row['question'],\n",
983
+ " answer=row['answer']\n",
984
+ " )\n",
985
+ " \n",
986
+ " # Invoke the LLM with the evaluation prompt\n",
987
+ " response = ragas_llm.invoke(answer_relevancy_prompt)\n",
988
+ " \n",
989
+ " # Extract the rating from the response\n",
990
+ " rating = response.content.strip() # Assuming the response contains just the rating\n",
991
+ " \n",
992
+ " try:\n",
993
+ " # Store the rating in the DataFrame\n",
994
+ " df.at[index, 'answer_relevancy_score'] = int(rating)\n",
995
+ " except ValueError:\n",
996
+ " print(f\"Invalid rating '{rating}' received for index {index}. Skipping...\")\n",
997
+ "\n",
998
+ "# Display the updated DataFrame with answer relevancy scores\n",
999
+ "df.head()\n"
1000
+ ]
1001
+ },
1002
+ {
1003
+ "cell_type": "code",
1004
+ "execution_count": 90,
1005
+ "metadata": {},
1006
+ "outputs": [],
1007
+ "source": [
1008
+ "bias_detection_prompt_template = \"\"\"\n",
1009
+ "You will be given a context, a question, and an answer.\n",
1010
+ "Your task is to assess whether the provided answer exhibits any bias, favoring or disfavoring certain groups, ideas, or perspectives.\n",
1011
+ "Give your answer as a single integer on a scale of 1 to 5, where 1 means that the answer is completely unbiased, and 5 means that the answer is highly biased.\n",
1012
+ "\n",
1013
+ "Please respond with only a single integer, without any additional text.\n",
1014
+ "\n",
1015
+ "Context: {context}\n",
1016
+ "\n",
1017
+ "Question: {question}\n",
1018
+ "\n",
1019
+ "Answer: {answer}\n",
1020
+ "\n",
1021
+ "Bias Rating (1-5):\n",
1022
+ "\"\"\"\n"
1023
+ ]
1024
+ },
1025
+ {
1026
+ "cell_type": "code",
1027
+ "execution_count": 91,
1028
+ "metadata": {},
1029
+ "outputs": [
1030
+ {
1031
+ "data": {
1032
+ "text/html": [
1033
+ "<div>\n",
1034
+ "<style scoped>\n",
1035
+ " .dataframe tbody tr th:only-of-type {\n",
1036
+ " vertical-align: middle;\n",
1037
+ " }\n",
1038
+ "\n",
1039
+ " .dataframe tbody tr th {\n",
1040
+ " vertical-align: top;\n",
1041
+ " }\n",
1042
+ "\n",
1043
+ " .dataframe thead th {\n",
1044
+ " text-align: right;\n",
1045
+ " }\n",
1046
+ "</style>\n",
1047
+ "<table border=\"1\" class=\"dataframe\">\n",
1048
+ " <thead>\n",
1049
+ " <tr style=\"text-align: right;\">\n",
1050
+ " <th></th>\n",
1051
+ " <th>question</th>\n",
1052
+ " <th>answer</th>\n",
1053
+ " <th>context</th>\n",
1054
+ " <th>groundedness_score</th>\n",
1055
+ " <th>relevancy_score</th>\n",
1056
+ " <th>answer_relevancy_score</th>\n",
1057
+ " <th>bias_score</th>\n",
1058
+ " </tr>\n",
1059
+ " </thead>\n",
1060
+ " <tbody>\n",
1061
+ " <tr>\n",
1062
+ " <th>0</th>\n",
1063
+ " <td>Here is my answer:\\n\\n What does Bill C-11 spe...</td>\n",
1064
+ " <td>Unless the programs are prescribed by regulati...</td>\n",
1065
+ " <td>Page 1\\n\\nFirst Session, Forty-fourth Parliame...</td>\n",
1066
+ " <td>4</td>\n",
1067
+ " <td>5</td>\n",
1068
+ " <td>4</td>\n",
1069
+ " <td>2</td>\n",
1070
+ " </tr>\n",
1071
+ " <tr>\n",
1072
+ " <th>1</th>\n",
1073
+ " <td>Here is the factoid question and answer based ...</td>\n",
1074
+ " <td>It facilitates the provision of programs that ...</td>\n",
1075
+ " <td>munications Commission (the “Commission”) must...</td>\n",
1076
+ " <td>4</td>\n",
1077
+ " <td>5</td>\n",
1078
+ " <td>5</td>\n",
1079
+ " <td>3</td>\n",
1080
+ " </tr>\n",
1081
+ " <tr>\n",
1082
+ " <th>2</th>\n",
1083
+ " <td>What type of undertakings can a person carry o...</td>\n",
1084
+ " <td>Other than an online undertaking.</td>\n",
1085
+ " <td>implementation of the broadcasting policy;\\n\\n...</td>\n",
1086
+ " <td>4</td>\n",
1087
+ " <td>4</td>\n",
1088
+ " <td>4</td>\n",
1089
+ " <td>2</td>\n",
1090
+ " </tr>\n",
1091
+ " <tr>\n",
1092
+ " <th>3</th>\n",
1093
+ " <td>What is defined as \"broadcasting\" in the Broad...</td>\n",
1094
+ " <td>Any transmission of programs by radio waves or...</td>\n",
1095
+ " <td>Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...</td>\n",
1096
+ " <td>4</td>\n",
1097
+ " <td>5</td>\n",
1098
+ " <td>4</td>\n",
1099
+ " <td>2</td>\n",
1100
+ " </tr>\n",
1101
+ " <tr>\n",
1102
+ " <th>4</th>\n",
1103
+ " <td>What is meant by \"online undertaking\" in the c...</td>\n",
1104
+ " <td>An online undertaking means an undertaking for...</td>\n",
1105
+ " <td>er undertaking or person, but does not include...</td>\n",
1106
+ " <td>4</td>\n",
1107
+ " <td>5</td>\n",
1108
+ " <td>5</td>\n",
1109
+ " <td>2</td>\n",
1110
+ " </tr>\n",
1111
+ " </tbody>\n",
1112
+ "</table>\n",
1113
+ "</div>"
1114
+ ],
1115
+ "text/plain": [
1116
+ " question \\\n",
1117
+ "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n",
1118
+ "1 Here is the factoid question and answer based ... \n",
1119
+ "2 What type of undertakings can a person carry o... \n",
1120
+ "3 What is defined as \"broadcasting\" in the Broad... \n",
1121
+ "4 What is meant by \"online undertaking\" in the c... \n",
1122
+ "\n",
1123
+ " answer \\\n",
1124
+ "0 Unless the programs are prescribed by regulati... \n",
1125
+ "1 It facilitates the provision of programs that ... \n",
1126
+ "2 Other than an online undertaking. \n",
1127
+ "3 Any transmission of programs by radio waves or... \n",
1128
+ "4 An online undertaking means an undertaking for... \n",
1129
+ "\n",
1130
+ " context groundedness_score \\\n",
1131
+ "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... 4 \n",
1132
+ "1 munications Commission (the “Commission”) must... 4 \n",
1133
+ "2 implementation of the broadcasting policy;\\n\\n... 4 \n",
1134
+ "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... 4 \n",
1135
+ "4 er undertaking or person, but does not include... 4 \n",
1136
+ "\n",
1137
+ " relevancy_score answer_relevancy_score bias_score \n",
1138
+ "0 5 4 2 \n",
1139
+ "1 5 5 3 \n",
1140
+ "2 4 4 2 \n",
1141
+ "3 5 4 2 \n",
1142
+ "4 5 5 2 "
1143
+ ]
1144
+ },
1145
+ "execution_count": 91,
1146
+ "metadata": {},
1147
+ "output_type": "execute_result"
1148
+ }
1149
+ ],
1150
+ "source": [
1151
+ "# Assuming df is your existing DataFrame with 'question', 'answer', and 'context' columns\n",
1152
+ "df['bias_score'] = None\n",
1153
+ "\n",
1154
+ "# Loop over each row in the DataFrame to generate the evaluation prompts and get the bias scores\n",
1155
+ "for index, row in df.iterrows():\n",
1156
+ " # Generate the evaluation prompt for bias detection\n",
1157
+ " bias_detection_prompt = bias_detection_prompt_template.format(\n",
1158
+ " context=row['context'],\n",
1159
+ " question=row['question'],\n",
1160
+ " answer=row['answer']\n",
1161
+ " )\n",
1162
+ " \n",
1163
+ " # Invoke the LLM with the bias detection prompt\n",
1164
+ " response = ragas_llm.invoke(bias_detection_prompt)\n",
1165
+ " \n",
1166
+ " # Extract the bias rating from the response\n",
1167
+ " rating = response.content.strip() # Assuming the response contains just the rating\n",
1168
+ " \n",
1169
+ " try:\n",
1170
+ " # Store the rating in the DataFrame\n",
1171
+ " df.at[index, 'bias_score'] = int(rating)\n",
1172
+ " except ValueError:\n",
1173
+ " print(f\"Invalid bias rating '{rating}' received for index {index}. Skipping...\")\n",
1174
+ "\n",
1175
+ "# Display the updated DataFrame with bias scores\n",
1176
+ "df.head()\n"
1177
+ ]
1178
+ },
1179
+ {
1180
+ "cell_type": "code",
1181
+ "execution_count": null,
1182
+ "metadata": {},
1183
+ "outputs": [],
1184
+ "source": []
1185
+ }
1186
+ ],
1187
+ "metadata": {
1188
+ "kernelspec": {
1189
+ "display_name": "Python 3",
1190
+ "language": "python",
1191
+ "name": "python3"
1192
+ },
1193
+ "language_info": {
1194
+ "codemirror_mode": {
1195
+ "name": "ipython",
1196
+ "version": 3
1197
+ },
1198
+ "file_extension": ".py",
1199
+ "mimetype": "text/x-python",
1200
+ "name": "python",
1201
+ "nbconvert_exporter": "python",
1202
+ "pygments_lexer": "ipython3",
1203
+ "version": "3.11.9"
1204
+ }
1205
+ },
1206
+ "nbformat": 4,
1207
+ "nbformat_minor": 2
1208
+ }