# Import the required libraries

In [11]:
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import TokenTextSplitter
from ragas.testset.extractor import KeyphraseExtractor
from ragas.testset.docstore import InMemoryDocumentStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader

# Starting the model using ollama

In [4]:
### Build
ragas_llm = ChatOllama(model="llama3:8b")
embeddings = OllamaEmbeddings(model="llama3:8b")

# Loading the document from the directory

In [None]:
# Try loading from a simple directory with plain text files
loader = DirectoryLoader(r"C:\Users\agshi\Desktop\Omdena\Canada Policy\TorontoCanadaChapter_CanPolicyInsight\task5_model_evaluation\data")

# Load documents
documents = loader.load()


# Creating chunks of documents

In [12]:
# Creating chunks of documents
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=2000,
    chunk_overlap=200,
    add_start_index=True,
    separators=["\n\n", "\n", ".", " ", "", "\n\n\n"],
)

docs_processed = []
for doc in documents:
    docs_processed += text_splitter.split_documents([doc])

print(len(docs_processed))

58


# Prompts for test set Generation

In [46]:
QA_generation_prompt_template = """
Your task is to write a factoid question and an answer given a context.
Your factoid question should be answerable with a specific, concise piece of factual information from the context.
Your factoid question should be formulated in the same style as questions users could ask in a search engine.
This means that your factoid question MUST NOT mention something like "according to the passage" or "context".
YOU MUST NOT MENTION in the factoid question Here is the factoid question and answer based on the given context
Provide your answer as follows:

Output:::
Factoid question: (your factoid question)
Answer: (your answer to the factoid question)

Now here is the context.

Context: {context}
Output:::
"""

In [24]:
print(docs_processed[1])

page_content='munications Commission (the “Commission”) must regulate

and supervise the Canadian broadcasting system in a manner

that

(i) takes into account the different characteristics of En-

glish, French and Indigenous language broadcasting and

the different conditions under which broadcasting under-

takings that provide English, French or Indigenous lan-

guage programming operate,

(ii) takes into account, among other things, the nature and

diversity of the services provided by broadcasting under-

takings,

(iii) ensures that any broadcasting undertaking that can-

not make maximum or predominant use of Canadian cre-

ative and other human resources in the creation, produc-

tion and presentation of programming contributes to

those Canadian resources in an equitable manner,

(iv) promotes innovation and is readily adaptable to sci-

entific and technological change,

(v) facilitates the provision to Canadians of Canadian pro-

grams in both official languages, including 

In [58]:
def store_factoid_question_answer(text):
    # Split the text into question and answer parts
    factoid_dict = {}
    parts = text.split("Answer:", 1)
    
    if len(parts) == 2:
        question = parts[0].replace("Factoid question:", "").strip()
        answer = parts[1].strip()
        
        # Create a dictionary to store the question and answer
        factoid_dict = {
            "question": question,
            "answer": answer
        }
        
        return factoid_dict
    else:
        return None

In [78]:
qa_dict = {}
factoid = {}
qa_results = []
for doc in docs_processed[:10]:  # Limiting to first 2 documents for testing
    # Extract the page content from the Document object
    page_content = doc.page_content
    
    # Generate the prompt for the current document using the template
    QA_generation_prompt = QA_generation_prompt_template.format(context=page_content)
    
    # Invoke the LLM with the generated prompt
    response = ragas_llm.invoke(QA_generation_prompt)
    content = response.content
    # Use the function to extract question and answer from the response
    factoid = store_factoid_question_answer(content)
    
    if factoid:
        # Store the question, answer, and context in a dictionary
        qa_dict = {
            "question": factoid['question'],
            "answer": factoid['answer'],
            "context": page_content
        }
        
        # Append the dictionary to the results list
        qa_results.append(qa_dict)
    else:
        print("Failed to parse response.")

In [79]:
import pandas as pd
df = pd.DataFrame(qa_results)

In [80]:
df.head(10)

Unnamed: 0,question,answer,context
0,Here is my answer:\n\n What does Bill C-11 spe...,Unless the programs are prescribed by regulati...,"Page 1\n\nFirst Session, Forty-fourth Parliame..."
1,Here is the factoid question and answer based ...,It facilitates the provision of programs that ...,munications Commission (the “Commission”) must...
2,What type of undertakings can a person carry o...,Other than an online undertaking.,implementation of the broadcasting policy;\n\n...
3,"What is defined as ""broadcasting"" in the Broad...",Any transmission of programs by radio waves or...,Page 4\n\nPage 5\n\n70-71 ELIZABETH II – 1 CHA...
4,"What is meant by ""online undertaking"" in the c...",An online undertaking means an undertaking for...,"er undertaking or person, but does not include..."
5,What does not carry on a broadcasting undertak...,A person who uses a social media service to up...,officielle en situation minoritaire)\n\n2021-2...
6,Here is the factoid question and answer based ...,The implementation of the objectives of the br...,(b) that is part of the operations of a primar...
7,What is the purpose of subsection (iii) of the...,Through its programming and employment opportu...,placed by the following:\n\n(ii) encourage the...
8,What is one way in which the Broadcasting Act ...,By supporting the production and broadcasting ...,(iii.2) support the production and broadcastin...
9,What is the responsibility of all persons who ...,They have a responsibility for the programs th...,(vi) ensure freedom of expression and journali...


# Evaluation of Answers using LLMs

In [81]:
groundedness_prompt_template = """
You will be given a context and a question.
Your task is to provide a 'total rating' scoring how well one can answer the given question unambiguously with the given context.
Give your answer as a single integer on a scale of 1 to 5, where 1 means that the question is not answerable at all given the context, and 5 means that the question is clearly and unambiguously answerable with the context.

Please respond with only a single integer, without any additional text.

Context: {context}

Question: {question}

Rating (1-5):
"""


In [82]:
# Add a new column to the DataFrame for storing the groundedness scores
df['groundedness_score'] = None

# Loop over each row in the DataFrame to generate the evaluation prompts and get the scores
for index, row in df.iterrows():
    # Generate the evaluation prompt
    evaluation_prompt = groundedness_prompt_template.format(context=row['context'], question=row['question'])
    
    # Invoke the LLM with the evaluation prompt
    response = ragas_llm.invoke(evaluation_prompt)
    
    # Extract the rating from the response
    rating = response.content.strip()  # Assuming the response contains just the rating
    
    try:
        # Store the rating in the DataFrame
        df.at[index, 'groundedness_score'] = int(rating)
    except ValueError:
        print(f"Invalid rating '{rating}' received for index {index}. Skipping...")

In [83]:
df.head()

Unnamed: 0,question,answer,context,groundedness_score
0,Here is my answer:\n\n What does Bill C-11 spe...,Unless the programs are prescribed by regulati...,"Page 1\n\nFirst Session, Forty-fourth Parliame...",4
1,Here is the factoid question and answer based ...,It facilitates the provision of programs that ...,munications Commission (the “Commission”) must...,4
2,What type of undertakings can a person carry o...,Other than an online undertaking.,implementation of the broadcasting policy;\n\n...,4
3,"What is defined as ""broadcasting"" in the Broad...",Any transmission of programs by radio waves or...,Page 4\n\nPage 5\n\n70-71 ELIZABETH II – 1 CHA...,4
4,"What is meant by ""online undertaking"" in the c...",An online undertaking means an undertaking for...,"er undertaking or person, but does not include...",4


In [84]:
relevancy_prompt_template = """
You will be given a context, a question, and an answer.
Your task is to provide a 'relevancy rating' scoring how relevant the answer is to the given question based on the context.
Give your answer as a single integer on a scale of 1 to 5, where 1 means that the answer is not relevant at all to the question given the context, and 5 means that the answer is highly relevant to the question given the context.

Please respond with only a single integer, without any additional text.

Context: {context}

Question: {question}

Answer: {answer}

Relevancy Rating (1-5):
"""


In [85]:
# Add a new column to the DataFrame for storing the relevancy scores
df['relevancy_score'] = None

# Loop over each row in the DataFrame to generate the evaluation prompts and get the scores
for index, row in df.iterrows():
    # Generate the evaluation prompt for relevancy
    relevancy_prompt = relevancy_prompt_template.format(context=row['context'], question=row['question'], answer=row['answer'])
    
    # Invoke the LLM with the evaluation prompt
    response = ragas_llm.invoke(relevancy_prompt)
    
    # Extract the rating from the response
    rating = response.content.strip()  # Assuming the response contains just the rating
    
    try:
        # Store the rating in the DataFrame
        df.at[index, 'relevancy_score'] = int(rating)
    except ValueError:
        print(f"Invalid rating '{rating}' received for index {index}. Skipping...")


In [86]:
df

Unnamed: 0,question,answer,context,groundedness_score,relevancy_score
0,Here is my answer:\n\n What does Bill C-11 spe...,Unless the programs are prescribed by regulati...,"Page 1\n\nFirst Session, Forty-fourth Parliame...",4,5
1,Here is the factoid question and answer based ...,It facilitates the provision of programs that ...,munications Commission (the “Commission”) must...,4,5
2,What type of undertakings can a person carry o...,Other than an online undertaking.,implementation of the broadcasting policy;\n\n...,4,4
3,"What is defined as ""broadcasting"" in the Broad...",Any transmission of programs by radio waves or...,Page 4\n\nPage 5\n\n70-71 ELIZABETH II – 1 CHA...,4,5
4,"What is meant by ""online undertaking"" in the c...",An online undertaking means an undertaking for...,"er undertaking or person, but does not include...",4,5
5,What does not carry on a broadcasting undertak...,A person who uses a social media service to up...,officielle en situation minoritaire)\n\n2021-2...,4,4
6,Here is the factoid question and answer based ...,The implementation of the objectives of the br...,(b) that is part of the operations of a primar...,4,4
7,What is the purpose of subsection (iii) of the...,Through its programming and employment opportu...,placed by the following:\n\n(ii) encourage the...,4,4
8,What is one way in which the Broadcasting Act ...,By supporting the production and broadcasting ...,(iii.2) support the production and broadcastin...,4,5
9,What is the responsibility of all persons who ...,They have a responsibility for the programs th...,(vi) ensure freedom of expression and journali...,5,4


In [87]:
answer_relevancy_prompt_template = """
You will be given a context, a question, and an answer.
Your task is to provide an 'answer relevancy rating' scoring how relevant the answer is to the given question based on the context.
Give your answer as a single integer on a scale of 1 to 5, where 1 means that the answer is not relevant at all to the question given the context, and 5 means that the answer is highly relevant to the question given the context.

Please respond with only a single integer, without any additional text.

Context: {context}

Question: {question}

Answer: {answer}

Answer Relevancy Rating (1-5):
"""


In [88]:
# Assuming df is your existing DataFrame with 'question', 'answer', and 'context' columns
df['answer_relevancy_score'] = None

# Loop over each row in the DataFrame to generate the evaluation prompts and get the scores
for index, row in df.iterrows():
    # Generate the evaluation prompt for answer relevancy
    answer_relevancy_prompt = answer_relevancy_prompt_template.format(
        context=row['context'],
        question=row['question'],
        answer=row['answer']
    )
    
    # Invoke the LLM with the evaluation prompt
    response = ragas_llm.invoke(answer_relevancy_prompt)
    
    # Extract the rating from the response
    rating = response.content.strip()  # Assuming the response contains just the rating
    
    try:
        # Store the rating in the DataFrame
        df.at[index, 'answer_relevancy_score'] = int(rating)
    except ValueError:
        print(f"Invalid rating '{rating}' received for index {index}. Skipping...")

# Display the updated DataFrame with answer relevancy scores
df.head()


Unnamed: 0,question,answer,context,groundedness_score,relevancy_score,answer_relevancy_score
0,Here is my answer:\n\n What does Bill C-11 spe...,Unless the programs are prescribed by regulati...,"Page 1\n\nFirst Session, Forty-fourth Parliame...",4,5,4
1,Here is the factoid question and answer based ...,It facilitates the provision of programs that ...,munications Commission (the “Commission”) must...,4,5,5
2,What type of undertakings can a person carry o...,Other than an online undertaking.,implementation of the broadcasting policy;\n\n...,4,4,4
3,"What is defined as ""broadcasting"" in the Broad...",Any transmission of programs by radio waves or...,Page 4\n\nPage 5\n\n70-71 ELIZABETH II – 1 CHA...,4,5,4
4,"What is meant by ""online undertaking"" in the c...",An online undertaking means an undertaking for...,"er undertaking or person, but does not include...",4,5,5


In [90]:
bias_detection_prompt_template = """
You will be given a context, a question, and an answer.
Your task is to assess whether the provided answer exhibits any bias, favoring or disfavoring certain groups, ideas, or perspectives.
Give your answer as a single integer on a scale of 1 to 5, where 1 means that the answer is completely unbiased, and 5 means that the answer is highly biased.

Please respond with only a single integer, without any additional text.

Context: {context}

Question: {question}

Answer: {answer}

Bias Rating (1-5):
"""


In [91]:
# Assuming df is your existing DataFrame with 'question', 'answer', and 'context' columns
df['bias_score'] = None

# Loop over each row in the DataFrame to generate the evaluation prompts and get the bias scores
for index, row in df.iterrows():
    # Generate the evaluation prompt for bias detection
    bias_detection_prompt = bias_detection_prompt_template.format(
        context=row['context'],
        question=row['question'],
        answer=row['answer']
    )
    
    # Invoke the LLM with the bias detection prompt
    response = ragas_llm.invoke(bias_detection_prompt)
    
    # Extract the bias rating from the response
    rating = response.content.strip()  # Assuming the response contains just the rating
    
    try:
        # Store the rating in the DataFrame
        df.at[index, 'bias_score'] = int(rating)
    except ValueError:
        print(f"Invalid bias rating '{rating}' received for index {index}. Skipping...")

# Display the updated DataFrame with bias scores
df.head()


Unnamed: 0,question,answer,context,groundedness_score,relevancy_score,answer_relevancy_score,bias_score
0,Here is my answer:\n\n What does Bill C-11 spe...,Unless the programs are prescribed by regulati...,"Page 1\n\nFirst Session, Forty-fourth Parliame...",4,5,4,2
1,Here is the factoid question and answer based ...,It facilitates the provision of programs that ...,munications Commission (the “Commission”) must...,4,5,5,3
2,What type of undertakings can a person carry o...,Other than an online undertaking.,implementation of the broadcasting policy;\n\n...,4,4,4,2
3,"What is defined as ""broadcasting"" in the Broad...",Any transmission of programs by radio waves or...,Page 4\n\nPage 5\n\n70-71 ELIZABETH II – 1 CHA...,4,5,4,2
4,"What is meant by ""online undertaking"" in the c...",An online undertaking means an undertaking for...,"er undertaking or person, but does not include...",4,5,5,2
