{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import the required libraries" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "from ragas.llms import LangchainLLMWrapper\n", "from ragas.embeddings import LangchainEmbeddingsWrapper\n", "from langchain_community.chat_models import ChatOllama\n", "from langchain_community.embeddings import OllamaEmbeddings\n", "from langchain.text_splitter import TokenTextSplitter\n", "from ragas.testset.extractor import KeyphraseExtractor\n", "from ragas.testset.docstore import InMemoryDocumentStore\n", "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", "from langchain_community.document_loaders import DirectoryLoader" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Starting the model using ollama" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "### Build\n", "ragas_llm = ChatOllama(model=\"llama3:8b\")\n", "embeddings = OllamaEmbeddings(model=\"llama3:8b\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Loading the document from the directory" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Try loading from a simple directory with plain text files\n", "loader = DirectoryLoader(r\"C:\\Users\\agshi\\Desktop\\Omdena\\Canada Policy\\TorontoCanadaChapter_CanPolicyInsight\\task5_model_evaluation\\data\")\n", "\n", "# Load documents\n", "documents = loader.load()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Creating chunks of documents" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "58\n" ] } ], "source": [ "# Creating chunks of documents\n", "text_splitter = RecursiveCharacterTextSplitter(\n", " chunk_size=2000,\n", " chunk_overlap=200,\n", " add_start_index=True,\n", " separators=[\"\\n\\n\", \"\\n\", \".\", \" \", \"\", \"\\n\\n\\n\"],\n", ")\n", "\n", "docs_processed = []\n", "for doc in documents:\n", " docs_processed += text_splitter.split_documents([doc])\n", "\n", "print(len(docs_processed))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Prompts for test set Generation" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "QA_generation_prompt_template = \"\"\"\n", "Your task is to write a factoid question and an answer given a context.\n", "Your factoid question should be answerable with a specific, concise piece of factual information from the context.\n", "Your factoid question should be formulated in the same style as questions users could ask in a search engine.\n", "This means that your factoid question MUST NOT mention something like \"according to the passage\" or \"context\".\n", "YOU MUST NOT MENTION in the factoid question Here is the factoid question and answer based on the given context\n", "Provide your answer as follows:\n", "\n", "Output:::\n", "Factoid question: (your factoid question)\n", "Answer: (your answer to the factoid question)\n", "\n", "Now here is the context.\n", "\n", "Context: {context}\n", "Output:::\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "page_content='munications Commission (the “Commission”) must regulate\n", "\n", "and supervise the Canadian broadcasting system in a manner\n", "\n", "that\n", "\n", "(i) takes into account the different characteristics of En-\n", "\n", "glish, French and Indigenous language broadcasting and\n", "\n", "the different conditions under which broadcasting under-\n", "\n", "takings that provide English, French or Indigenous lan-\n", "\n", "guage programming operate,\n", "\n", "(ii) takes into account, among other things, the nature and\n", "\n", "diversity of the services provided by broadcasting under-\n", "\n", "takings,\n", "\n", "(iii) ensures that any broadcasting undertaking that can-\n", "\n", "not make maximum or predominant use of Canadian cre-\n", "\n", "ative and other human resources in the creation, produc-\n", "\n", "tion and presentation of programming contributes to\n", "\n", "those Canadian resources in an equitable manner,\n", "\n", "(iv) promotes innovation and is readily adaptable to sci-\n", "\n", "entific and technological change,\n", "\n", "(v) facilitates the provision to Canadians of Canadian pro-\n", "\n", "grams in both official languages, including those created\n", "\n", "and produced by official language minority communities\n", "\n", "Available on the House of Commons website at the following address:\n", "\n", "www.ourcommons.ca\n", "\n", "2021-2022-2023\n", "\n", "Page 3\n", "\n", "in Canada, as well as Canadian programs in Indigenous\n", "\n", "languages,\n", "\n", "(vi) facilitates the provision of programs that are accessi-\n", "\n", "ble without barriers to persons with disabilities,\n", "\n", "(vii) facilitates the provision to Canadians of programs\n", "\n", "created and produced by members of Black or other\n", "\n", "racialized communities,\n", "\n", "(viii) protects the privacy of individuals who are members\n", "\n", "of the audience of programs broadcast, and\n", "\n", "(ix) takes into account the variety of broadcasting under-\n", "\n", "takings to which the Act applies and avoids imposing obli-\n", "\n", "gations on any class of broadcasting undertakings if that\n", "\n", "imposition will not contribute in a material manner to the\n", "\n", "implementation of the broadcasting policy;\n", "\n", "(f) amend the procedure relating to the issuance by the Gov-\n", "\n", "ernor in Council of policy directions to the Commission;' metadata={'source': 'C:\\\\Users\\\\agshi\\\\Desktop\\\\Omdena\\\\Canada Policy\\\\TorontoCanadaChapter_CanPolicyInsight\\\\task5_model_evaluation\\\\data\\\\C-11_4.txt', 'start_index': 1765}\n" ] } ], "source": [ "print(docs_processed[1])" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "def store_factoid_question_answer(text):\n", " # Split the text into question and answer parts\n", " factoid_dict = {}\n", " parts = text.split(\"Answer:\", 1)\n", " \n", " if len(parts) == 2:\n", " question = parts[0].replace(\"Factoid question:\", \"\").strip()\n", " answer = parts[1].strip()\n", " \n", " # Create a dictionary to store the question and answer\n", " factoid_dict = {\n", " \"question\": question,\n", " \"answer\": answer\n", " }\n", " \n", " return factoid_dict\n", " else:\n", " return None" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [], "source": [ "qa_dict = {}\n", "factoid = {}\n", "qa_results = []\n", "for doc in docs_processed[:10]: # Limiting to first 2 documents for testing\n", " # Extract the page content from the Document object\n", " page_content = doc.page_content\n", " \n", " # Generate the prompt for the current document using the template\n", " QA_generation_prompt = QA_generation_prompt_template.format(context=page_content)\n", " \n", " # Invoke the LLM with the generated prompt\n", " response = ragas_llm.invoke(QA_generation_prompt)\n", " content = response.content\n", " # Use the function to extract question and answer from the response\n", " factoid = store_factoid_question_answer(content)\n", " \n", " if factoid:\n", " # Store the question, answer, and context in a dictionary\n", " qa_dict = {\n", " \"question\": factoid['question'],\n", " \"answer\": factoid['answer'],\n", " \"context\": page_content\n", " }\n", " \n", " # Append the dictionary to the results list\n", " qa_results.append(qa_dict)\n", " else:\n", " print(\"Failed to parse response.\")" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.DataFrame(qa_results)" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionanswercontext
0Here is my answer:\\n\\n What does Bill C-11 spe...Unless the programs are prescribed by regulati...Page 1\\n\\nFirst Session, Forty-fourth Parliame...
1Here is the factoid question and answer based ...It facilitates the provision of programs that ...munications Commission (the “Commission”) must...
2What type of undertakings can a person carry o...Other than an online undertaking.implementation of the broadcasting policy;\\n\\n...
3What is defined as \"broadcasting\" in the Broad...Any transmission of programs by radio waves or...Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...
4What is meant by \"online undertaking\" in the c...An online undertaking means an undertaking for...er undertaking or person, but does not include...
5What does not carry on a broadcasting undertak...A person who uses a social media service to up...officielle en situation minoritaire)\\n\\n2021-2...
6Here is the factoid question and answer based ...The implementation of the objectives of the br...(b) that is part of the operations of a primar...
7What is the purpose of subsection (iii) of the...Through its programming and employment opportu...placed by the following:\\n\\n(ii) encourage the...
8What is one way in which the Broadcasting Act ...By supporting the production and broadcasting ...(iii.2) support the production and broadcastin...
9What is the responsibility of all persons who ...They have a responsibility for the programs th...(vi) ensure freedom of expression and journali...
\n", "
" ], "text/plain": [ " question \\\n", "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n", "1 Here is the factoid question and answer based ... \n", "2 What type of undertakings can a person carry o... \n", "3 What is defined as \"broadcasting\" in the Broad... \n", "4 What is meant by \"online undertaking\" in the c... \n", "5 What does not carry on a broadcasting undertak... \n", "6 Here is the factoid question and answer based ... \n", "7 What is the purpose of subsection (iii) of the... \n", "8 What is one way in which the Broadcasting Act ... \n", "9 What is the responsibility of all persons who ... \n", "\n", " answer \\\n", "0 Unless the programs are prescribed by regulati... \n", "1 It facilitates the provision of programs that ... \n", "2 Other than an online undertaking. \n", "3 Any transmission of programs by radio waves or... \n", "4 An online undertaking means an undertaking for... \n", "5 A person who uses a social media service to up... \n", "6 The implementation of the objectives of the br... \n", "7 Through its programming and employment opportu... \n", "8 By supporting the production and broadcasting ... \n", "9 They have a responsibility for the programs th... \n", "\n", " context \n", "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... \n", "1 munications Commission (the “Commission”) must... \n", "2 implementation of the broadcasting policy;\\n\\n... \n", "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... \n", "4 er undertaking or person, but does not include... \n", "5 officielle en situation minoritaire)\\n\\n2021-2... \n", "6 (b) that is part of the operations of a primar... \n", "7 placed by the following:\\n\\n(ii) encourage the... \n", "8 (iii.2) support the production and broadcastin... \n", "9 (vi) ensure freedom of expression and journali... " ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Evaluation of Answers using LLMs" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [], "source": [ "groundedness_prompt_template = \"\"\"\n", "You will be given a context and a question.\n", "Your task is to provide a 'total rating' scoring how well one can answer the given question unambiguously with the given context.\n", "Give your answer as a single integer on a scale of 1 to 5, where 1 means that the question is not answerable at all given the context, and 5 means that the question is clearly and unambiguously answerable with the context.\n", "\n", "Please respond with only a single integer, without any additional text.\n", "\n", "Context: {context}\n", "\n", "Question: {question}\n", "\n", "Rating (1-5):\n", "\"\"\"\n" ] }, { "cell_type": "code", "execution_count": 82, "metadata": {}, "outputs": [], "source": [ "# Add a new column to the DataFrame for storing the groundedness scores\n", "df['groundedness_score'] = None\n", "\n", "# Loop over each row in the DataFrame to generate the evaluation prompts and get the scores\n", "for index, row in df.iterrows():\n", " # Generate the evaluation prompt\n", " evaluation_prompt = groundedness_prompt_template.format(context=row['context'], question=row['question'])\n", " \n", " # Invoke the LLM with the evaluation prompt\n", " response = ragas_llm.invoke(evaluation_prompt)\n", " \n", " # Extract the rating from the response\n", " rating = response.content.strip() # Assuming the response contains just the rating\n", " \n", " try:\n", " # Store the rating in the DataFrame\n", " df.at[index, 'groundedness_score'] = int(rating)\n", " except ValueError:\n", " print(f\"Invalid rating '{rating}' received for index {index}. Skipping...\")" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionanswercontextgroundedness_score
0Here is my answer:\\n\\n What does Bill C-11 spe...Unless the programs are prescribed by regulati...Page 1\\n\\nFirst Session, Forty-fourth Parliame...4
1Here is the factoid question and answer based ...It facilitates the provision of programs that ...munications Commission (the “Commission”) must...4
2What type of undertakings can a person carry o...Other than an online undertaking.implementation of the broadcasting policy;\\n\\n...4
3What is defined as \"broadcasting\" in the Broad...Any transmission of programs by radio waves or...Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...4
4What is meant by \"online undertaking\" in the c...An online undertaking means an undertaking for...er undertaking or person, but does not include...4
\n", "
" ], "text/plain": [ " question \\\n", "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n", "1 Here is the factoid question and answer based ... \n", "2 What type of undertakings can a person carry o... \n", "3 What is defined as \"broadcasting\" in the Broad... \n", "4 What is meant by \"online undertaking\" in the c... \n", "\n", " answer \\\n", "0 Unless the programs are prescribed by regulati... \n", "1 It facilitates the provision of programs that ... \n", "2 Other than an online undertaking. \n", "3 Any transmission of programs by radio waves or... \n", "4 An online undertaking means an undertaking for... \n", "\n", " context groundedness_score \n", "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... 4 \n", "1 munications Commission (the “Commission”) must... 4 \n", "2 implementation of the broadcasting policy;\\n\\n... 4 \n", "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... 4 \n", "4 er undertaking or person, but does not include... 4 " ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "relevancy_prompt_template = \"\"\"\n", "You will be given a context, a question, and an answer.\n", "Your task is to provide a 'relevancy rating' scoring how relevant the answer is to the given question based on the context.\n", "Give your answer as a single integer on a scale of 1 to 5, where 1 means that the answer is not relevant at all to the question given the context, and 5 means that the answer is highly relevant to the question given the context.\n", "\n", "Please respond with only a single integer, without any additional text.\n", "\n", "Context: {context}\n", "\n", "Question: {question}\n", "\n", "Answer: {answer}\n", "\n", "Relevancy Rating (1-5):\n", "\"\"\"\n" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [], "source": [ "# Add a new column to the DataFrame for storing the relevancy scores\n", "df['relevancy_score'] = None\n", "\n", "# Loop over each row in the DataFrame to generate the evaluation prompts and get the scores\n", "for index, row in df.iterrows():\n", " # Generate the evaluation prompt for relevancy\n", " relevancy_prompt = relevancy_prompt_template.format(context=row['context'], question=row['question'], answer=row['answer'])\n", " \n", " # Invoke the LLM with the evaluation prompt\n", " response = ragas_llm.invoke(relevancy_prompt)\n", " \n", " # Extract the rating from the response\n", " rating = response.content.strip() # Assuming the response contains just the rating\n", " \n", " try:\n", " # Store the rating in the DataFrame\n", " df.at[index, 'relevancy_score'] = int(rating)\n", " except ValueError:\n", " print(f\"Invalid rating '{rating}' received for index {index}. Skipping...\")\n" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionanswercontextgroundedness_scorerelevancy_score
0Here is my answer:\\n\\n What does Bill C-11 spe...Unless the programs are prescribed by regulati...Page 1\\n\\nFirst Session, Forty-fourth Parliame...45
1Here is the factoid question and answer based ...It facilitates the provision of programs that ...munications Commission (the “Commission”) must...45
2What type of undertakings can a person carry o...Other than an online undertaking.implementation of the broadcasting policy;\\n\\n...44
3What is defined as \"broadcasting\" in the Broad...Any transmission of programs by radio waves or...Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...45
4What is meant by \"online undertaking\" in the c...An online undertaking means an undertaking for...er undertaking or person, but does not include...45
5What does not carry on a broadcasting undertak...A person who uses a social media service to up...officielle en situation minoritaire)\\n\\n2021-2...44
6Here is the factoid question and answer based ...The implementation of the objectives of the br...(b) that is part of the operations of a primar...44
7What is the purpose of subsection (iii) of the...Through its programming and employment opportu...placed by the following:\\n\\n(ii) encourage the...44
8What is one way in which the Broadcasting Act ...By supporting the production and broadcasting ...(iii.2) support the production and broadcastin...45
9What is the responsibility of all persons who ...They have a responsibility for the programs th...(vi) ensure freedom of expression and journali...54
\n", "
" ], "text/plain": [ " question \\\n", "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n", "1 Here is the factoid question and answer based ... \n", "2 What type of undertakings can a person carry o... \n", "3 What is defined as \"broadcasting\" in the Broad... \n", "4 What is meant by \"online undertaking\" in the c... \n", "5 What does not carry on a broadcasting undertak... \n", "6 Here is the factoid question and answer based ... \n", "7 What is the purpose of subsection (iii) of the... \n", "8 What is one way in which the Broadcasting Act ... \n", "9 What is the responsibility of all persons who ... \n", "\n", " answer \\\n", "0 Unless the programs are prescribed by regulati... \n", "1 It facilitates the provision of programs that ... \n", "2 Other than an online undertaking. \n", "3 Any transmission of programs by radio waves or... \n", "4 An online undertaking means an undertaking for... \n", "5 A person who uses a social media service to up... \n", "6 The implementation of the objectives of the br... \n", "7 Through its programming and employment opportu... \n", "8 By supporting the production and broadcasting ... \n", "9 They have a responsibility for the programs th... \n", "\n", " context groundedness_score \\\n", "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... 4 \n", "1 munications Commission (the “Commission”) must... 4 \n", "2 implementation of the broadcasting policy;\\n\\n... 4 \n", "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... 4 \n", "4 er undertaking or person, but does not include... 4 \n", "5 officielle en situation minoritaire)\\n\\n2021-2... 4 \n", "6 (b) that is part of the operations of a primar... 4 \n", "7 placed by the following:\\n\\n(ii) encourage the... 4 \n", "8 (iii.2) support the production and broadcastin... 4 \n", "9 (vi) ensure freedom of expression and journali... 5 \n", "\n", " relevancy_score \n", "0 5 \n", "1 5 \n", "2 4 \n", "3 5 \n", "4 5 \n", "5 4 \n", "6 4 \n", "7 4 \n", "8 5 \n", "9 4 " ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "answer_relevancy_prompt_template = \"\"\"\n", "You will be given a context, a question, and an answer.\n", "Your task is to provide an 'answer relevancy rating' scoring how relevant the answer is to the given question based on the context.\n", "Give your answer as a single integer on a scale of 1 to 5, where 1 means that the answer is not relevant at all to the question given the context, and 5 means that the answer is highly relevant to the question given the context.\n", "\n", "Please respond with only a single integer, without any additional text.\n", "\n", "Context: {context}\n", "\n", "Question: {question}\n", "\n", "Answer: {answer}\n", "\n", "Answer Relevancy Rating (1-5):\n", "\"\"\"\n" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionanswercontextgroundedness_scorerelevancy_scoreanswer_relevancy_score
0Here is my answer:\\n\\n What does Bill C-11 spe...Unless the programs are prescribed by regulati...Page 1\\n\\nFirst Session, Forty-fourth Parliame...454
1Here is the factoid question and answer based ...It facilitates the provision of programs that ...munications Commission (the “Commission”) must...455
2What type of undertakings can a person carry o...Other than an online undertaking.implementation of the broadcasting policy;\\n\\n...444
3What is defined as \"broadcasting\" in the Broad...Any transmission of programs by radio waves or...Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...454
4What is meant by \"online undertaking\" in the c...An online undertaking means an undertaking for...er undertaking or person, but does not include...455
\n", "
" ], "text/plain": [ " question \\\n", "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n", "1 Here is the factoid question and answer based ... \n", "2 What type of undertakings can a person carry o... \n", "3 What is defined as \"broadcasting\" in the Broad... \n", "4 What is meant by \"online undertaking\" in the c... \n", "\n", " answer \\\n", "0 Unless the programs are prescribed by regulati... \n", "1 It facilitates the provision of programs that ... \n", "2 Other than an online undertaking. \n", "3 Any transmission of programs by radio waves or... \n", "4 An online undertaking means an undertaking for... \n", "\n", " context groundedness_score \\\n", "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... 4 \n", "1 munications Commission (the “Commission”) must... 4 \n", "2 implementation of the broadcasting policy;\\n\\n... 4 \n", "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... 4 \n", "4 er undertaking or person, but does not include... 4 \n", "\n", " relevancy_score answer_relevancy_score \n", "0 5 4 \n", "1 5 5 \n", "2 4 4 \n", "3 5 4 \n", "4 5 5 " ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Assuming df is your existing DataFrame with 'question', 'answer', and 'context' columns\n", "df['answer_relevancy_score'] = None\n", "\n", "# Loop over each row in the DataFrame to generate the evaluation prompts and get the scores\n", "for index, row in df.iterrows():\n", " # Generate the evaluation prompt for answer relevancy\n", " answer_relevancy_prompt = answer_relevancy_prompt_template.format(\n", " context=row['context'],\n", " question=row['question'],\n", " answer=row['answer']\n", " )\n", " \n", " # Invoke the LLM with the evaluation prompt\n", " response = ragas_llm.invoke(answer_relevancy_prompt)\n", " \n", " # Extract the rating from the response\n", " rating = response.content.strip() # Assuming the response contains just the rating\n", " \n", " try:\n", " # Store the rating in the DataFrame\n", " df.at[index, 'answer_relevancy_score'] = int(rating)\n", " except ValueError:\n", " print(f\"Invalid rating '{rating}' received for index {index}. Skipping...\")\n", "\n", "# Display the updated DataFrame with answer relevancy scores\n", "df.head()\n" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "bias_detection_prompt_template = \"\"\"\n", "You will be given a context, a question, and an answer.\n", "Your task is to assess whether the provided answer exhibits any bias, favoring or disfavoring certain groups, ideas, or perspectives.\n", "Give your answer as a single integer on a scale of 1 to 5, where 1 means that the answer is completely unbiased, and 5 means that the answer is highly biased.\n", "\n", "Please respond with only a single integer, without any additional text.\n", "\n", "Context: {context}\n", "\n", "Question: {question}\n", "\n", "Answer: {answer}\n", "\n", "Bias Rating (1-5):\n", "\"\"\"\n" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
questionanswercontextgroundedness_scorerelevancy_scoreanswer_relevancy_scorebias_score
0Here is my answer:\\n\\n What does Bill C-11 spe...Unless the programs are prescribed by regulati...Page 1\\n\\nFirst Session, Forty-fourth Parliame...4542
1Here is the factoid question and answer based ...It facilitates the provision of programs that ...munications Commission (the “Commission”) must...4553
2What type of undertakings can a person carry o...Other than an online undertaking.implementation of the broadcasting policy;\\n\\n...4442
3What is defined as \"broadcasting\" in the Broad...Any transmission of programs by radio waves or...Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA...4542
4What is meant by \"online undertaking\" in the c...An online undertaking means an undertaking for...er undertaking or person, but does not include...4552
\n", "
" ], "text/plain": [ " question \\\n", "0 Here is my answer:\\n\\n What does Bill C-11 spe... \n", "1 Here is the factoid question and answer based ... \n", "2 What type of undertakings can a person carry o... \n", "3 What is defined as \"broadcasting\" in the Broad... \n", "4 What is meant by \"online undertaking\" in the c... \n", "\n", " answer \\\n", "0 Unless the programs are prescribed by regulati... \n", "1 It facilitates the provision of programs that ... \n", "2 Other than an online undertaking. \n", "3 Any transmission of programs by radio waves or... \n", "4 An online undertaking means an undertaking for... \n", "\n", " context groundedness_score \\\n", "0 Page 1\\n\\nFirst Session, Forty-fourth Parliame... 4 \n", "1 munications Commission (the “Commission”) must... 4 \n", "2 implementation of the broadcasting policy;\\n\\n... 4 \n", "3 Page 4\\n\\nPage 5\\n\\n70-71 ELIZABETH II – 1 CHA... 4 \n", "4 er undertaking or person, but does not include... 4 \n", "\n", " relevancy_score answer_relevancy_score bias_score \n", "0 5 4 2 \n", "1 5 5 3 \n", "2 4 4 2 \n", "3 5 4 2 \n", "4 5 5 2 " ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Assuming df is your existing DataFrame with 'question', 'answer', and 'context' columns\n", "df['bias_score'] = None\n", "\n", "# Loop over each row in the DataFrame to generate the evaluation prompts and get the bias scores\n", "for index, row in df.iterrows():\n", " # Generate the evaluation prompt for bias detection\n", " bias_detection_prompt = bias_detection_prompt_template.format(\n", " context=row['context'],\n", " question=row['question'],\n", " answer=row['answer']\n", " )\n", " \n", " # Invoke the LLM with the bias detection prompt\n", " response = ragas_llm.invoke(bias_detection_prompt)\n", " \n", " # Extract the bias rating from the response\n", " rating = response.content.strip() # Assuming the response contains just the rating\n", " \n", " try:\n", " # Store the rating in the DataFrame\n", " df.at[index, 'bias_score'] = int(rating)\n", " except ValueError:\n", " print(f\"Invalid bias rating '{rating}' received for index {index}. Skipping...\")\n", "\n", "# Display the updated DataFrame with bias scores\n", "df.head()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }