Saim-11 commited on
Commit
5e16dce
·
verified ·
1 Parent(s): e3ea6f4

Update constitution_py.py

Browse files
Files changed (1) hide show
  1. constitution_py.py +63 -24
constitution_py.py CHANGED
@@ -20,13 +20,13 @@ a_llm = get_answer_llm()
20
 
21
  # Load sentence transformer model once globally
22
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
23
- save_dir = "."
24
 
25
  from functools import lru_cache
26
 
27
  # Cache embeddings and index loading
28
  @lru_cache(maxsize=1)
29
- def load_embeddings_and_index(save_dir="."):
30
  embedding = np.load(os.path.join(save_dir, "embeddings.npy"))
31
  index = faiss.read_index(os.path.join(save_dir, "index.faiss"))
32
  with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f:
@@ -35,11 +35,11 @@ def load_embeddings_and_index(save_dir="."):
35
 
36
 
37
  similar_words = [
38
- "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",
39
  "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define",
40
  "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose",
41
  "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore",
42
- "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight"
43
  ]
44
 
45
  def is_explanation_query(query):
@@ -48,20 +48,22 @@ def is_explanation_query(query):
48
  def retrieve_relevant_chunks(query, index, chunks, top_k=5):
49
  sub_str = "article"
50
  numbers = re.findall(r'\d+', query)
51
- flag = False
52
  if sub_str in query.lower() and numbers:
53
  article_number = str(numbers[0])
54
  for i, chunk in enumerate(chunks):
55
  if chunk.lower().startswith(f"article;{article_number}"):
56
  flag = is_explanation_query(query)
57
-
58
- return [chunk], flag
59
- print(flag)
 
60
  query_embedding = embedding_model.encode([query])
61
  query_embedding = np.array(query_embedding).astype("float32")
62
  distances, indices = index.search(query_embedding, top_k)
63
  relevant_chunks = [chunks[i] for i in indices[0]]
64
- return relevant_chunks, flag
 
65
 
66
  # Prompt to refine the query
67
  refine_prompt_template = ChatPromptTemplate.from_messages([
@@ -107,14 +109,44 @@ answer_prompt_template_query = ChatPromptTemplate.from_messages([
107
 
108
  answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser)
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  # Prompt for explanation-style answers
111
- explanation_prompt_template_query = ChatPromptTemplate.from_messages([
 
 
 
 
112
  ("system",
113
- "You are a legal expert assistant with deep knowledge of the Constitution of Pakistan. "
114
- "You will receive a user query and a set of context chunks from the Constitution. "
115
- "Your task is to determine if the query is answerable based strictly on the information provided in the context. "
116
  "If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. "
117
- "If the information needed to answer is not found in the provided chunks, respond with a structured message indicating `Is Relevant: False`, and do not fabricate any information."
118
  ),
119
 
120
  ("human",
@@ -123,17 +155,21 @@ explanation_prompt_template_query = ChatPromptTemplate.from_messages([
123
  "Instructions:\n"
124
  "1. Use only the information in the context to determine if the query can be answered.\n"
125
  "2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n"
126
- "3. If the query is answerable based on the context, explain the related article, clause, or provision clearly and precisely:\n"
127
- " - Include the Article number if available.\n"
128
- " - Describe its meaning and how it functions within the Constitution.\n"
129
  "4. Do NOT use real-world references, court cases, or examples.\n"
130
- "5. Conclude your response with:\n"
131
- " - `Is Relevant: True/False`\n"
132
- " - `Related Article(s)`: List article number(s) if any.\n\n"
 
133
  "{format_instructions}\n")
134
  ])
135
 
136
- answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template_query, output_parser=parser)
 
 
 
137
 
138
  # Load data
139
  embeddings, index, chunks = load_embeddings_and_index(save_dir)
@@ -148,7 +184,7 @@ def get_legal_response(query):
148
 
149
  print("\nRefined Query:", refined_query)
150
 
151
- relevant_chunks, flag = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5)
152
 
153
  print("\nTop Relevant Chunks:")
154
  for i, chunk in enumerate(relevant_chunks, 1):
@@ -156,9 +192,12 @@ def get_legal_response(query):
156
 
157
  context = "\n\n".join(relevant_chunks)
158
 
159
- if flag==True:
160
  print('okokokokokokokokokokok')
161
  response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
 
 
 
162
  else:
163
  print('nononononononononono')
164
  response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
@@ -167,5 +206,5 @@ def get_legal_response(query):
167
  "title":response.title,
168
  "answer": response.answer,
169
  "is_relevant": response.is_relevant,
170
- "article_number": response.article_number
171
  }
 
20
 
21
  # Load sentence transformer model once globally
22
  embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
23
+ save_dir = "saved_data"
24
 
25
  from functools import lru_cache
26
 
27
  # Cache embeddings and index loading
28
  @lru_cache(maxsize=1)
29
+ def load_embeddings_and_index(save_dir="saved_data"):
30
  embedding = np.load(os.path.join(save_dir, "embeddings.npy"))
31
  index = faiss.read_index(os.path.join(save_dir, "index.faiss"))
32
  with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f:
 
35
 
36
 
37
  similar_words = [
38
+ "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",'in simple words',
39
  "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define",
40
  "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose",
41
  "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore",
42
+ "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight","Brief"
43
  ]
44
 
45
  def is_explanation_query(query):
 
48
  def retrieve_relevant_chunks(query, index, chunks, top_k=5):
49
  sub_str = "article"
50
  numbers = re.findall(r'\d+', query)
51
+ var = 1
52
  if sub_str in query.lower() and numbers:
53
  article_number = str(numbers[0])
54
  for i, chunk in enumerate(chunks):
55
  if chunk.lower().startswith(f"article;{article_number}"):
56
  flag = is_explanation_query(query)
57
+ if flag == False:
58
+ var = 2
59
+ return [chunk], var
60
+
61
  query_embedding = embedding_model.encode([query])
62
  query_embedding = np.array(query_embedding).astype("float32")
63
  distances, indices = index.search(query_embedding, top_k)
64
  relevant_chunks = [chunks[i] for i in indices[0]]
65
+ var = 3
66
+ return relevant_chunks,var
67
 
68
  # Prompt to refine the query
69
  refine_prompt_template = ChatPromptTemplate.from_messages([
 
109
 
110
  answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser)
111
 
112
+
113
+ explain_article_prompt_template = ChatPromptTemplate.from_messages([
114
+ ("system",
115
+ "You are a helpful assistant that analyzes human-written legal or constitutional text. "
116
+ "Your task is to return a structured response with the following fields:\n"
117
+ "- title: The title of the article, if available or derivable.\n"
118
+ "- answer: A clear explanation or summary of the content.\n"
119
+ "- is_relevant: true if the content is relevant to the legal or constitutional domain, otherwise false.\n"
120
+ "- article_number: Extract the article number (e.g., Article 11 or Article 3(a)), or return 'None' if not found."
121
+ ),
122
+ ("human",
123
+ "query:\n{query}\n\n"
124
+ "Context:\n{context}\n\n"
125
+ "Return your response in the following format:\n\n"
126
+ "title:\n"
127
+ "answer:\n"
128
+ "is_relevant:\n"
129
+ "article_number\n\n"
130
+ "{format_instructions}")
131
+ ])
132
+
133
+
134
+ explain_chain_article = LLMChain(llm=a_llm,prompt=explain_article_prompt_template,output_parser=parser)
135
+
136
+
137
+
138
  # Prompt for explanation-style answers
139
+ from langchain.prompts import ChatPromptTemplate
140
+
141
+ from langchain.prompts import ChatPromptTemplate
142
+
143
+ explanation_prompt_template = ChatPromptTemplate.from_messages([
144
  ("system",
145
+ "You are a legal expert assistant with deep knowledge of the Pakistan Penal Code, 1860 (PPC). "
146
+ "You will receive a user query and a set of context chunks from the law. "
147
+ "Your task is to determine if the query is answerable strictly based on the provided context. "
148
  "If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. "
149
+ "If the information needed to answer is not found in the provided chunks, respond with a structured message indicating Is Relevant: False, and do not fabricate any information."
150
  ),
151
 
152
  ("human",
 
155
  "Instructions:\n"
156
  "1. Use only the information in the context to determine if the query can be answered.\n"
157
  "2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n"
158
+ "3. If the query is answerable based on the context, explain the related section or clause clearly and precisely:\n"
159
+ " - Include the Section number if available.\n"
160
+ " - Describe its meaning and how it functions within the PPC.\n"
161
  "4. Do NOT use real-world references, court cases, or examples.\n"
162
+ "5. Your final output must include the following structured return:\n"
163
+ " - A *detailed explanation* of the relevant section or provision.\n"
164
+ " - Is Relevant: True/False\n"
165
+ " - Related Section(s): List section number(s) if any.\n\n"
166
  "{format_instructions}\n")
167
  ])
168
 
169
+
170
+
171
+
172
+ answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template, output_parser=parser)
173
 
174
  # Load data
175
  embeddings, index, chunks = load_embeddings_and_index(save_dir)
 
184
 
185
  print("\nRefined Query:", refined_query)
186
 
187
+ relevant_chunks, var = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5)
188
 
189
  print("\nTop Relevant Chunks:")
190
  for i, chunk in enumerate(relevant_chunks, 1):
 
192
 
193
  context = "\n\n".join(relevant_chunks)
194
 
195
+ if var==1:
196
  print('okokokokokokokokokokok')
197
  response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
198
+ elif var==2:
199
+ print('newnewnewnewnew')
200
+ response = explain_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
201
  else:
202
  print('nononononononononono')
203
  response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
 
206
  "title":response.title,
207
  "answer": response.answer,
208
  "is_relevant": response.is_relevant,
209
+ "article_number": response.article_number,
210
  }