|
import warnings |
|
warnings.filterwarnings("ignore") |
|
|
|
import re |
|
import os |
|
import numpy as np |
|
import faiss |
|
from sentence_transformers import SentenceTransformer |
|
from langchain_groq import ChatGroq |
|
from langchain.chains import LLMChain |
|
from langchain_core.prompts import ChatPromptTemplate |
|
from pydantic import BaseModel, Field |
|
from langchain.output_parsers import PydanticOutputParser |
|
from lm import get_query_llm, get_answer_llm |
|
from functools import lru_cache |
|
|
|
|
|
q_llm = get_query_llm() |
|
a_llm = get_answer_llm() |
|
|
|
|
|
embedding_model = SentenceTransformer("all-MiniLM-L6-v2") |
|
save_dir = "." |
|
|
|
from functools import lru_cache |
|
|
|
|
|
@lru_cache(maxsize=1) |
|
def load_embeddings_and_index(save_dir="saved_data"): |
|
embedding = np.load(os.path.join(save_dir, "embeddings.npy")) |
|
index = faiss.read_index(os.path.join(save_dir, "index.faiss")) |
|
with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f: |
|
chunks = [line.strip() for line in f.readlines()] |
|
return embedding, index, chunks |
|
|
|
|
|
similar_words = [ |
|
"explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",'in simple words', |
|
"demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define", |
|
"unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose", |
|
"comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore", |
|
"enlighten", "present", "review", "report", "state", "point out", "inform", "highlight","Brief" |
|
] |
|
|
|
def is_explanation_query(query): |
|
return not any(word in query.lower() for word in similar_words) |
|
|
|
def retrieve_relevant_chunks(query, index, chunks, top_k=5): |
|
sub_str = "article" |
|
numbers = re.findall(r'\d+', query) |
|
var = 1 |
|
if sub_str in query.lower() and numbers: |
|
article_number = str(numbers[0]) |
|
for i, chunk in enumerate(chunks): |
|
if chunk.lower().startswith(f"article;{article_number}"): |
|
flag = is_explanation_query(query) |
|
if flag == False: |
|
var = 2 |
|
return [chunk], var |
|
|
|
query_embedding = embedding_model.encode([query]) |
|
query_embedding = np.array(query_embedding).astype("float32") |
|
distances, indices = index.search(query_embedding, top_k) |
|
relevant_chunks = [chunks[i] for i in indices[0]] |
|
var = 3 |
|
return relevant_chunks,var |
|
|
|
|
|
refine_prompt_template = ChatPromptTemplate.from_messages([ |
|
('system', |
|
"You are a legal assistant specialized in cleaning user queries. " |
|
"Your task is to fix spelling mistakes and convert number words to digits only (e.g., 'three' to '3'). " |
|
"Do not correct grammar, punctuation, or capitalization. " |
|
"Do not restructure or rephrase the query in any way. " |
|
"Do not add or remove words. " |
|
"If the input is already clean or does not make sense, return it exactly as it is. " |
|
"Only return one corrected query."), |
|
('human', '{query}') |
|
]) |
|
refine_chain = LLMChain(llm=q_llm, prompt=refine_prompt_template) |
|
|
|
|
|
class LegalResponse(BaseModel): |
|
title: str = Field (...,description='Return the title') |
|
answer: str = Field(..., description="The assistant's answer to the user's query") |
|
is_relevant: bool = Field(..., description="True if the query is relevant to the Constitution of Pakistan, otherwise False") |
|
article_number: str = Field(..., description="Mentioned article number if available, else empty string") |
|
|
|
parser = PydanticOutputParser(pydantic_object=LegalResponse) |
|
|
|
|
|
answer_prompt_template_query = ChatPromptTemplate.from_messages([ |
|
("system", |
|
"You are a legal assistant with expertise in the Constitution of Pakistan. " |
|
"Return answer in structure format." |
|
"Your task is to extract and present the exact constitutional text, without paraphrasing, ensuring accuracy and fidelity to the original wording" |
|
"Especially return the title"), |
|
("human", |
|
"User Query: {query}\n\n" |
|
"Instructions:\n" |
|
"0. Return Title" |
|
"1. Return the exact wording from the Constitution.\n" |
|
"2. If a query references a specific article or sub-clause (e.g., Article 11(3)(b), Article 11(b), or 11(i)), return only the exact wording of that clause from the Constitution — do not include the full article unless required by structure\n" |
|
"3. Indicate whether the query is related to the Constitution of Pakistan (Yes/No).ar\n" |
|
"4. Extract and return the article number if it is mentioned. with sub-clause if its mentioned like 1,2 or 1(a)\n\n" |
|
"Context:\n{context}\n\n" |
|
"{format_instructions}\n") |
|
]) |
|
|
|
answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser) |
|
|
|
|
|
explain_article_prompt_template = ChatPromptTemplate.from_messages([ |
|
("system", |
|
"You are a helpful assistant that analyzes human-written legal or constitutional text. " |
|
"Your task is to return a structured response with the following fields:\n" |
|
"- title: The title of the article, if available or derivable.\n" |
|
"- answer: A clear explanation or summary of the content.\n" |
|
"- is_relevant: true if the content is relevant to the legal or constitutional domain, otherwise false.\n" |
|
"- article_number: Extract the article number (e.g., Article 11 or Article 3(a)), or return 'None' if not found." |
|
), |
|
("human", |
|
"query:\n{query}\n\n" |
|
"Context:\n{context}\n\n" |
|
"Return your response in the following format:\n\n" |
|
"title:\n" |
|
"answer:\n" |
|
"is_relevant:\n" |
|
"article_number\n\n" |
|
"{format_instructions}") |
|
]) |
|
|
|
|
|
explain_chain_article = LLMChain(llm=a_llm,prompt=explain_article_prompt_template,output_parser=parser) |
|
|
|
|
|
|
|
|
|
from langchain.prompts import ChatPromptTemplate |
|
|
|
from langchain.prompts import ChatPromptTemplate |
|
|
|
explanation_prompt_template = ChatPromptTemplate.from_messages([ |
|
("system", |
|
"You are a legal expert assistant with deep knowledge of the Pakistan Penal Code, 1860 (PPC). " |
|
"You will receive a user query and a set of context chunks from the law. " |
|
"Your task is to determine if the query is answerable strictly based on the provided context. " |
|
"If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. " |
|
"If the information needed to answer is not found in the provided chunks, respond with a structured message indicating Is Relevant: False, and do not fabricate any information." |
|
), |
|
|
|
("human", |
|
"User Query: {query}\n\n" |
|
"Context (Extracted Chunks):\n{context}\n\n" |
|
"Instructions:\n" |
|
"1. Use only the information in the context to determine if the query can be answered.\n" |
|
"2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n" |
|
"3. If the query is answerable based on the context, explain the related section or clause clearly and precisely:\n" |
|
" - Include the Section number if available.\n" |
|
" - Describe its meaning and how it functions within the PPC.\n" |
|
"4. Do NOT use real-world references, court cases, or examples.\n" |
|
"5. Your final output must include the following structured return:\n" |
|
" - A *detailed explanation* of the relevant section or provision.\n" |
|
" - Is Relevant: True/False\n" |
|
" - Related Section(s): List section number(s) if any.\n\n" |
|
"{format_instructions}\n") |
|
]) |
|
|
|
|
|
|
|
|
|
answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template, output_parser=parser) |
|
|
|
|
|
embeddings, index, chunks = load_embeddings_and_index(save_dir) |
|
|
|
|
|
def get_legal_response(query): |
|
try: |
|
refined_query = refine_chain.run(query=query) |
|
except Exception as e: |
|
print(f"[Refinement Error] Using raw query instead: {e}") |
|
refined_query = query |
|
|
|
print("\nRefined Query:", refined_query) |
|
|
|
relevant_chunks, var = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5) |
|
|
|
print("\nTop Relevant Chunks:") |
|
for i, chunk in enumerate(relevant_chunks, 1): |
|
print(f"\nChunk {i}:\n{'-'*50}\n{chunk}") |
|
|
|
context = "\n\n".join(relevant_chunks) |
|
|
|
if var==1: |
|
print('okokokokokokokokokokok') |
|
response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions()) |
|
elif var==2: |
|
print('newnewnewnewnew') |
|
response = explain_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions()) |
|
else: |
|
print('nononononononononono') |
|
response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions()) |
|
|
|
return { |
|
"title":response.title, |
|
"answer": response.answer, |
|
"is_relevant": response.is_relevant, |
|
"article_number": response.article_number, |
|
} |
|
|