Saim-11 commited on
Commit
b2300d6
·
verified ·
1 Parent(s): 72293a9

Upload 6 files

Browse files
Files changed (7) hide show
  1. .env +9 -0
  2. .gitattributes +1 -0
  3. chunks.txt +0 -0
  4. constitution_py.py +171 -0
  5. embeddings.npy +3 -0
  6. index.faiss +3 -0
  7. lm.py +15 -0
.env ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ GROQ_API_KEY_1="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"
2
+ GROQ_API_KEY_2="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"
3
+
4
+
5
+ #GROQ_API_KEY = "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"
6
+ LANGCHAIN_API_KEY = "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"
7
+ LANGCHAIN_PROJECT = "WORK"
8
+ LANGCHAIN_TRACING_V2 = "true"
9
+ #GOOGLE_API_KEY = "684ff9d7746ed6e9d54268345c5ad41655ffac7d"
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ index.faiss filter=lfs diff=lfs merge=lfs -text
chunks.txt ADDED
The diff for this file is too large to render. See raw diff
 
constitution_py.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ import re
5
+ import os
6
+ import numpy as np
7
+ import faiss
8
+ from sentence_transformers import SentenceTransformer
9
+ from langchain_groq import ChatGroq
10
+ from langchain.chains import LLMChain
11
+ from langchain_core.prompts import ChatPromptTemplate
12
+ from pydantic import BaseModel, Field
13
+ from langchain.output_parsers import PydanticOutputParser
14
+ from lm import get_query_llm, get_answer_llm # Your custom LLM wrapper functions
15
+ from functools import lru_cache
16
+
17
+ # Initialize LLMs
18
+ q_llm = get_query_llm()
19
+ a_llm = get_answer_llm()
20
+
21
+ # Load sentence transformer model once globally
22
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
23
+ save_dir = "saved_data"
24
+
25
+ from functools import lru_cache
26
+
27
+ # Cache embeddings and index loading
28
+ @lru_cache(maxsize=1)
29
+ def load_embeddings_and_index(save_dir="saved_data"):
30
+ embedding = np.load(os.path.join(save_dir, "embeddings.npy"))
31
+ index = faiss.read_index(os.path.join(save_dir, "index.faiss"))
32
+ with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f:
33
+ chunks = [line.strip() for line in f.readlines()]
34
+ return embedding, index, chunks
35
+
36
+
37
+ similar_words = [
38
+ "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",
39
+ "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define",
40
+ "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose",
41
+ "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore",
42
+ "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight"
43
+ ]
44
+
45
+ def is_explanation_query(query):
46
+ return not any(word in query.lower() for word in similar_words)
47
+
48
+ def retrieve_relevant_chunks(query, index, chunks, top_k=5):
49
+ sub_str = "article"
50
+ numbers = re.findall(r'\d+', query)
51
+ flag = False
52
+ if sub_str in query.lower() and numbers:
53
+ article_number = str(numbers[0])
54
+ for i, chunk in enumerate(chunks):
55
+ if chunk.lower().startswith(f"article;{article_number}"):
56
+ flag = is_explanation_query(query)
57
+
58
+ return [chunk], flag
59
+ print(flag)
60
+ query_embedding = embedding_model.encode([query])
61
+ query_embedding = np.array(query_embedding).astype("float32")
62
+ distances, indices = index.search(query_embedding, top_k)
63
+ relevant_chunks = [chunks[i] for i in indices[0]]
64
+ return relevant_chunks, flag
65
+
66
+ # Prompt to refine the query
67
+ refine_prompt_template = ChatPromptTemplate.from_messages([
68
+ ('system',
69
+ "You are a legal assistant specialized in cleaning user queries. "
70
+ "Your task is to fix spelling mistakes and convert number words to digits only (e.g., 'three' to '3'). "
71
+ "Do not correct grammar, punctuation, or capitalization. "
72
+ "Do not restructure or rephrase the query in any way. "
73
+ "Do not add or remove words. "
74
+ "If the input is already clean or does not make sense, return it exactly as it is. "
75
+ "Only return one corrected query."),
76
+ ('human', '{query}')
77
+ ])
78
+ refine_chain = LLMChain(llm=q_llm, prompt=refine_prompt_template)
79
+
80
+ # Define response schema
81
+ class LegalResponse(BaseModel):
82
+ title: str = Field (...,description='Return the title')
83
+ answer: str = Field(..., description="The assistant's answer to the user's query")
84
+ is_relevant: bool = Field(..., description="True if the query is relevant to the Constitution of Pakistan, otherwise False")
85
+ article_number: str = Field(..., description="Mentioned article number if available, else empty string")
86
+
87
+ parser = PydanticOutputParser(pydantic_object=LegalResponse)
88
+
89
+ # Prompt for direct article wording
90
+ answer_prompt_template_query = ChatPromptTemplate.from_messages([
91
+ ("system",
92
+ "You are a legal assistant with expertise in the Constitution of Pakistan. "
93
+ "Return answer in structure format."
94
+ "Your task is to extract and present the exact constitutional text, without paraphrasing, ensuring accuracy and fidelity to the original wording"
95
+ "Especially return the title"),
96
+ ("human",
97
+ "User Query: {query}\n\n"
98
+ "Instructions:\n"
99
+ "0. Return Title"
100
+ "1. Return the exact wording from the Constitution.\n"
101
+ "2. If a query references a specific article or sub-clause (e.g., Article 11(3)(b), Article 11(b), or 11(i)), return only the exact wording of that clause from the Constitution — do not include the full article unless required by structure\n"
102
+ "3. Indicate whether the query is related to the Constitution of Pakistan (Yes/No).ar\n"
103
+ "4. Extract and return the article number if it is mentioned. with sub-clause if its mentioned like 1,2 or 1(a)\n\n"
104
+ "Context:\n{context}\n\n"
105
+ "{format_instructions}\n")
106
+ ])
107
+
108
+ answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser)
109
+
110
+ # Prompt for explanation-style answers
111
+ explanation_prompt_template_query = ChatPromptTemplate.from_messages([
112
+ ("system",
113
+ "You are a legal expert assistant with deep knowledge of the Constitution of Pakistan. "
114
+ "You will receive a user query and a set of context chunks from the Constitution. "
115
+ "Your task is to determine if the query is answerable based strictly on the information provided in the context. "
116
+ "If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. "
117
+ "If the information needed to answer is not found in the provided chunks, respond with a structured message indicating `Is Relevant: False`, and do not fabricate any information."
118
+ ),
119
+
120
+ ("human",
121
+ "User Query: {query}\n\n"
122
+ "Context (Extracted Chunks):\n{context}\n\n"
123
+ "Instructions:\n"
124
+ "1. Use only the information in the context to determine if the query can be answered.\n"
125
+ "2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n"
126
+ "3. If the query is answerable based on the context, explain the related article, clause, or provision clearly and precisely:\n"
127
+ " - Include the Article number if available.\n"
128
+ " - Describe its meaning and how it functions within the Constitution.\n"
129
+ "4. Do NOT use real-world references, court cases, or examples.\n"
130
+ "5. Conclude your response with:\n"
131
+ " - `Is Relevant: True/False`\n"
132
+ " - `Related Article(s)`: List article number(s) if any.\n\n"
133
+ "{format_instructions}\n")
134
+ ])
135
+
136
+ answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template_query, output_parser=parser)
137
+
138
+ # Load data
139
+ embeddings, index, chunks = load_embeddings_and_index(save_dir)
140
+
141
+ # Main function
142
+ def get_legal_response(query):
143
+ try:
144
+ refined_query = refine_chain.run(query=query)
145
+ except Exception as e:
146
+ print(f"[Refinement Error] Using raw query instead: {e}")
147
+ refined_query = query
148
+
149
+ print("\nRefined Query:", refined_query)
150
+
151
+ relevant_chunks, flag = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5)
152
+
153
+ print("\nTop Relevant Chunks:")
154
+ for i, chunk in enumerate(relevant_chunks, 1):
155
+ print(f"\nChunk {i}:\n{'-'*50}\n{chunk}")
156
+
157
+ context = "\n\n".join(relevant_chunks)
158
+
159
+ if flag==True:
160
+ print('okokokokokokokokokokok')
161
+ response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
162
+ else:
163
+ print('nononononononononono')
164
+ response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
165
+
166
+ return {
167
+ "title":response.title,
168
+ "answer": response.answer,
169
+ "is_relevant": response.is_relevant,
170
+ "article_number": response.article_number
171
+ }
embeddings.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7594f7c33917c21f7168af5f896d08589c2f9644e6f31096665d0738cf8a7aed
3
+ size 427136
index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78883b11d6015e1f366dfe10d42b8582b09906c836eba65dd1dd5bf3f8391471
3
+ size 427053
lm.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_groq import ChatGroq
4
+
5
+ load_dotenv()
6
+ key1 = os.getenv('GROQ_API_KEY_1')
7
+ key2 = os.getenv('GROQ_API_KEY_2')
8
+
9
+
10
+
11
+ def get_query_llm():
12
+ return ChatGroq(model="mistral-saba-24b",api_key=key1)
13
+
14
+ def get_answer_llm():
15
+ return ChatGroq(model="mistral-saba-24b",api_key=key2)