import warnings warnings.filterwarnings("ignore") import re import os import numpy as np import faiss from sentence_transformers import SentenceTransformer from langchain_groq import ChatGroq from langchain.chains import LLMChain from langchain_core.prompts import ChatPromptTemplate from pydantic import BaseModel, Field from langchain.output_parsers import PydanticOutputParser from lm import get_query_llm, get_answer_llm # Your custom LLM wrapper functions from functools import lru_cache # Initialize LLMs q_llm = get_query_llm() a_llm = get_answer_llm() # Load sentence transformer model once globally embedding_model = SentenceTransformer("all-MiniLM-L6-v2") save_dir = "." from functools import lru_cache # Cache embeddings and index loading @lru_cache(maxsize=1) def load_embeddings_and_index(save_dir="saved_data"): embedding = np.load(os.path.join(save_dir, "embeddings.npy")) index = faiss.read_index(os.path.join(save_dir, "index.faiss")) with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f: chunks = [line.strip() for line in f.readlines()] return embedding, index, chunks similar_words = [ "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",'in simple words', "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define", "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose", "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore", "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight","Brief" ] def is_explanation_query(query): return not any(word in query.lower() for word in similar_words) def retrieve_relevant_chunks(query, index, chunks, top_k=5): sub_str = "article" numbers = re.findall(r'\d+', query) var = 1 if sub_str in query.lower() and numbers: article_number = str(numbers[0]) for i, chunk in enumerate(chunks): if chunk.lower().startswith(f"article;{article_number}"): flag = is_explanation_query(query) if flag == False: var = 2 return [chunk], var query_embedding = embedding_model.encode([query]) query_embedding = np.array(query_embedding).astype("float32") distances, indices = index.search(query_embedding, top_k) relevant_chunks = [chunks[i] for i in indices[0]] var = 3 return relevant_chunks,var # Prompt to refine the query refine_prompt_template = ChatPromptTemplate.from_messages([ ('system', "You are a legal assistant specialized in cleaning user queries. " "Your task is to fix spelling mistakes and convert number words to digits only (e.g., 'three' to '3'). " "Do not correct grammar, punctuation, or capitalization. " "Do not restructure or rephrase the query in any way. " "Do not add or remove words. " "If the input is already clean or does not make sense, return it exactly as it is. " "Only return one corrected query."), ('human', '{query}') ]) refine_chain = LLMChain(llm=q_llm, prompt=refine_prompt_template) # Define response schema class LegalResponse(BaseModel): title: str = Field (...,description='Return the title') answer: str = Field(..., description="The assistant's answer to the user's query") is_relevant: bool = Field(..., description="True if the query is relevant to the Constitution of Pakistan, otherwise False") article_number: str = Field(..., description="Mentioned article number if available, else empty string") parser = PydanticOutputParser(pydantic_object=LegalResponse) # Prompt for direct article wording answer_prompt_template_query = ChatPromptTemplate.from_messages([ ("system", "You are a legal assistant with expertise in the Constitution of Pakistan. " "Return answer in structure format." "Your task is to extract and present the exact constitutional text, without paraphrasing, ensuring accuracy and fidelity to the original wording" "Especially return the title"), ("human", "User Query: {query}\n\n" "Instructions:\n" "0. Return Title" "1. Return the exact wording from the Constitution.\n" "2. If a query references a specific article or sub-clause (e.g., Article 11(3)(b), Article 11(b), or 11(i)), return only the exact wording of that clause from the Constitution — do not include the full article unless required by structure\n" "3. Indicate whether the query is related to the Constitution of Pakistan (Yes/No).ar\n" "4. Extract and return the article number if it is mentioned. with sub-clause if its mentioned like 1,2 or 1(a)\n\n" "Context:\n{context}\n\n" "{format_instructions}\n") ]) answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser) explain_article_prompt_template = ChatPromptTemplate.from_messages([ ("system", "You are a helpful assistant that analyzes human-written legal or constitutional text. " "Your task is to return a structured response with the following fields:\n" "- title: The title of the article, if available or derivable.\n" "- answer: A clear explanation or summary of the content.\n" "- is_relevant: true if the content is relevant to the legal or constitutional domain, otherwise false.\n" "- article_number: Extract the article number (e.g., Article 11 or Article 3(a)), or return 'None' if not found." ), ("human", "query:\n{query}\n\n" "Context:\n{context}\n\n" "Return your response in the following format:\n\n" "title:\n" "answer:\n" "is_relevant:\n" "article_number\n\n" "{format_instructions}") ]) explain_chain_article = LLMChain(llm=a_llm,prompt=explain_article_prompt_template,output_parser=parser) # Prompt for explanation-style answers from langchain.prompts import ChatPromptTemplate from langchain.prompts import ChatPromptTemplate explanation_prompt_template = ChatPromptTemplate.from_messages([ ("system", "You are a legal expert assistant with deep knowledge of the Pakistan Penal Code, 1860 (PPC). " "You will receive a user query and a set of context chunks from the law. " "Your task is to determine if the query is answerable strictly based on the provided context. " "If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. " "If the information needed to answer is not found in the provided chunks, respond with a structured message indicating Is Relevant: False, and do not fabricate any information." ), ("human", "User Query: {query}\n\n" "Context (Extracted Chunks):\n{context}\n\n" "Instructions:\n" "1. Use only the information in the context to determine if the query can be answered.\n" "2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n" "3. If the query is answerable based on the context, explain the related section or clause clearly and precisely:\n" " - Include the Section number if available.\n" " - Describe its meaning and how it functions within the PPC.\n" "4. Do NOT use real-world references, court cases, or examples.\n" "5. Your final output must include the following structured return:\n" " - A *detailed explanation* of the relevant section or provision.\n" " - Is Relevant: True/False\n" " - Related Section(s): List section number(s) if any.\n\n" "{format_instructions}\n") ]) answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template, output_parser=parser) # Load data embeddings, index, chunks = load_embeddings_and_index(save_dir) # Main function def get_legal_response(query): try: refined_query = refine_chain.run(query=query) except Exception as e: print(f"[Refinement Error] Using raw query instead: {e}") refined_query = query print("\nRefined Query:", refined_query) relevant_chunks, var = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5) print("\nTop Relevant Chunks:") for i, chunk in enumerate(relevant_chunks, 1): print(f"\nChunk {i}:\n{'-'*50}\n{chunk}") context = "\n\n".join(relevant_chunks) if var==1: print('okokokokokokokokokokok') response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions()) elif var==2: print('newnewnewnewnew') response = explain_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions()) else: print('nononononononononono') response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions()) return { "title":response.title, "answer": response.answer, "is_relevant": response.is_relevant, "article_number": response.article_number, }