Spaces:

Saim-11
/

PakLegalAI

Sleeping

App Files Files Community

Saim-11 commited on Apr 23

Commit

5e16dce

verified ·

1 Parent(s): e3ea6f4

Update constitution_py.py

Browse files

Files changed (1) hide show

constitution_py.py +63 -24

constitution_py.py CHANGED Viewed

@@ -20,13 +20,13 @@ a_llm = get_answer_llm()
 # Load sentence transformer model once globally
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-save_dir = "."
 from functools import lru_cache
 # Cache embeddings and index loading
 @lru_cache(maxsize=1)
-def load_embeddings_and_index(save_dir="."):
     embedding = np.load(os.path.join(save_dir, "embeddings.npy"))
     index = faiss.read_index(os.path.join(save_dir, "index.faiss"))
     with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f:
@@ -35,11 +35,11 @@ def load_embeddings_and_index(save_dir="."):
 similar_words = [
-    "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",
     "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define",
     "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose",
     "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore",
-    "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight"
 ]
 def is_explanation_query(query):
@@ -48,20 +48,22 @@ def is_explanation_query(query):
 def retrieve_relevant_chunks(query, index, chunks, top_k=5):
     sub_str = "article"
     numbers = re.findall(r'\d+', query)
-    flag = False
     if sub_str in query.lower() and numbers:
         article_number = str(numbers[0])
         for i, chunk in enumerate(chunks):
             if chunk.lower().startswith(f"article;{article_number}"):
                 flag = is_explanation_query(query)
-                return [chunk], flag
-    print(flag)
     query_embedding = embedding_model.encode([query])
     query_embedding = np.array(query_embedding).astype("float32")
     distances, indices = index.search(query_embedding, top_k)
     relevant_chunks = [chunks[i] for i in indices[0]]
-    return relevant_chunks, flag
 # Prompt to refine the query
 refine_prompt_template = ChatPromptTemplate.from_messages([
@@ -107,14 +109,44 @@ answer_prompt_template_query = ChatPromptTemplate.from_messages([
 answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser)
 # Prompt for explanation-style answers
-explanation_prompt_template_query = ChatPromptTemplate.from_messages([
     ("system",
-     "You are a legal expert assistant with deep knowledge of the Constitution of Pakistan. "
-     "You will receive a user query and a set of context chunks from the Constitution. "
-     "Your task is to determine if the query is answerable based strictly on the information provided in the context. "
      "If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. "
-     "If the information needed to answer is not found in the provided chunks, respond with a structured message indicating `Is Relevant: False`, and do not fabricate any information."
     ),
     ("human",
@@ -123,17 +155,21 @@ explanation_prompt_template_query = ChatPromptTemplate.from_messages([
      "Instructions:\n"
      "1. Use only the information in the context to determine if the query can be answered.\n"
      "2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n"
-     "3. If the query is answerable based on the context, explain the related article, clause, or provision clearly and precisely:\n"
-     "   - Include the Article number if available.\n"
-     "   - Describe its meaning and how it functions within the Constitution.\n"
      "4. Do NOT use real-world references, court cases, or examples.\n"
-     "5. Conclude your response with:\n"
-     "   - `Is Relevant: True/False`\n"
-     "   - `Related Article(s)`: List article number(s) if any.\n\n"
      "{format_instructions}\n")
 ])
-answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template_query, output_parser=parser)
 # Load data
 embeddings, index, chunks = load_embeddings_and_index(save_dir)
@@ -148,7 +184,7 @@ def get_legal_response(query):
     print("\nRefined Query:", refined_query)
-    relevant_chunks, flag = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5)
     print("\nTop Relevant Chunks:")
     for i, chunk in enumerate(relevant_chunks, 1):
@@ -156,9 +192,12 @@ def get_legal_response(query):
     context = "\n\n".join(relevant_chunks)
-    if flag==True:
         print('okokokokokokokokokokok')
         response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
     else:
         print('nononononononononono')
         response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
@@ -167,5 +206,5 @@ def get_legal_response(query):
         "title":response.title,
         "answer": response.answer,
         "is_relevant": response.is_relevant,
-        "article_number": response.article_number
     }

 # Load sentence transformer model once globally
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+save_dir = "saved_data"
 from functools import lru_cache
 # Cache embeddings and index loading
 @lru_cache(maxsize=1)
+def load_embeddings_and_index(save_dir="saved_data"):
     embedding = np.load(os.path.join(save_dir, "embeddings.npy"))
     index = faiss.read_index(os.path.join(save_dir, "index.faiss"))
     with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f:
 similar_words = [
+    "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",'in simple words',
     "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define",
     "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose",
     "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore",
+    "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight","Brief"
 ]
 def is_explanation_query(query):
 def retrieve_relevant_chunks(query, index, chunks, top_k=5):
     sub_str = "article"
     numbers = re.findall(r'\d+', query)
+    var = 1
     if sub_str in query.lower() and numbers:
         article_number = str(numbers[0])
         for i, chunk in enumerate(chunks):
             if chunk.lower().startswith(f"article;{article_number}"):
                 flag = is_explanation_query(query)
+                if flag == False:
+                    var = 2
+                return [chunk], var
     query_embedding = embedding_model.encode([query])
     query_embedding = np.array(query_embedding).astype("float32")
     distances, indices = index.search(query_embedding, top_k)
     relevant_chunks = [chunks[i] for i in indices[0]]
+    var = 3
+    return relevant_chunks,var
 # Prompt to refine the query
 refine_prompt_template = ChatPromptTemplate.from_messages([
 answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser)
+explain_article_prompt_template = ChatPromptTemplate.from_messages([
+    ("system",
+     "You are a helpful assistant that analyzes human-written legal or constitutional text. "
+     "Your task is to return a structured response with the following fields:\n"
+     "- title: The title of the article, if available or derivable.\n"
+     "- answer: A clear explanation or summary of the content.\n"
+     "- is_relevant: true if the content is relevant to the legal or constitutional domain, otherwise false.\n"
+     "- article_number: Extract the article number (e.g., Article 11 or Article 3(a)), or return 'None' if not found."
+    ),
+    ("human",
+     "query:\n{query}\n\n"
+     "Context:\n{context}\n\n"
+     "Return your response in the following format:\n\n"
+     "title:\n"
+     "answer:\n"
+     "is_relevant:\n"
+     "article_number\n\n"
+     "{format_instructions}")
+])
+explain_chain_article = LLMChain(llm=a_llm,prompt=explain_article_prompt_template,output_parser=parser)
 # Prompt for explanation-style answers
+from langchain.prompts import ChatPromptTemplate
+from langchain.prompts import ChatPromptTemplate
+explanation_prompt_template = ChatPromptTemplate.from_messages([
     ("system",
+     "You are a legal expert assistant with deep knowledge of the Pakistan Penal Code, 1860 (PPC). "
+     "You will receive a user query and a set of context chunks from the law. "
+     "Your task is to determine if the query is answerable strictly based on the provided context. "
      "If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. "
+     "If the information needed to answer is not found in the provided chunks, respond with a structured message indicating Is Relevant: False, and do not fabricate any information."
     ),
     ("human",
      "Instructions:\n"
      "1. Use only the information in the context to determine if the query can be answered.\n"
      "2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n"
+     "3. If the query is answerable based on the context, explain the related section or clause clearly and precisely:\n"
+     "   - Include the Section number if available.\n"
+     "   - Describe its meaning and how it functions within the PPC.\n"
      "4. Do NOT use real-world references, court cases, or examples.\n"
+     "5. Your final output must include the following structured return:\n"
+     "   - A *detailed explanation* of the relevant section or provision.\n"
+     "   - Is Relevant: True/False\n"
+     "   - Related Section(s): List section number(s) if any.\n\n"
      "{format_instructions}\n")
 ])
+answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template, output_parser=parser)
 # Load data
 embeddings, index, chunks = load_embeddings_and_index(save_dir)
     print("\nRefined Query:", refined_query)
+    relevant_chunks, var = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5)
     print("\nTop Relevant Chunks:")
     for i, chunk in enumerate(relevant_chunks, 1):
     context = "\n\n".join(relevant_chunks)
+    if var==1:
         print('okokokokokokokokokokok')
         response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
+    elif var==2:
+        print('newnewnewnewnew')
+        response = explain_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
     else:
         print('nononononononononono')
         response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
         "title":response.title,
         "answer": response.answer,
         "is_relevant": response.is_relevant,
+        "article_number": response.article_number,
     }