Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- Indianconstitution.csv +0 -0
- classify.py +29 -0
- format.py +13 -0
- ingest.py +18 -0
- langchain_helper.py +50 -0
- main.py +26 -0
- requirements.txt +12 -0
- translate.py +21 -0
Indianconstitution.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
classify.py
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
import os
|
3 |
+
import google.generativeai as genai
|
4 |
+
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
|
8 |
+
|
9 |
+
model = genai.GenerativeModel('gemini-pro')
|
10 |
+
|
11 |
+
def classify_query(query):
|
12 |
+
classifier_prompt = (
|
13 |
+
"## Query Classification\n"
|
14 |
+
"Please classify the following query as 'Indian Legal' or 'Non-Indian Legal':\n"
|
15 |
+
"Query: '{}'\n"
|
16 |
+
"Classification:"
|
17 |
+
)
|
18 |
+
|
19 |
+
# Compose prompt with the given query
|
20 |
+
prompt = classifier_prompt.format(query)
|
21 |
+
|
22 |
+
# Use Google Gen AI to generate a response based on the prompt
|
23 |
+
response = model.generate_content(prompt)
|
24 |
+
|
25 |
+
# Extract the generated classification from the response
|
26 |
+
classification = response.text.strip()
|
27 |
+
|
28 |
+
return classification
|
29 |
+
|
format.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def format_paragraph(text):
|
4 |
+
|
5 |
+
text = text.replace("**", "</b>").replace("**", "<b>")
|
6 |
+
|
7 |
+
|
8 |
+
pattern = re.compile(r'</b>')
|
9 |
+
|
10 |
+
text = re.sub(pattern, lambda m: "<b>" if pattern.subn('', text[:m.start()])[1] % 2 == 0 else m.group(0), text)
|
11 |
+
text = text.replace('\n','<br>')
|
12 |
+
text = text.replace('*','•')
|
13 |
+
return text
|
ingest.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
2 |
+
from langchain.vectorstores import FAISS
|
3 |
+
from langchain.document_loaders.csv_loader import CSVLoader
|
4 |
+
|
5 |
+
vectordb_file_path = "faiss_index"
|
6 |
+
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
|
7 |
+
|
8 |
+
|
9 |
+
loader = CSVLoader(file_path='Indianconstitution.csv',encoding='utf-8-sig')
|
10 |
+
data = loader.load()
|
11 |
+
|
12 |
+
|
13 |
+
vectordb = FAISS.from_documents(documents=data,
|
14 |
+
embedding=instructor_embeddings)
|
15 |
+
|
16 |
+
|
17 |
+
vectordb.save_local(vectordb_file_path)
|
18 |
+
|
langchain_helper.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.vectorstores import FAISS
|
2 |
+
from langchain.llms import GooglePalm
|
3 |
+
from langchain.document_loaders.csv_loader import CSVLoader
|
4 |
+
from langchain.embeddings import HuggingFaceInstructEmbeddings
|
5 |
+
from langchain.prompts import PromptTemplate
|
6 |
+
from langchain.chains import RetrievalQA
|
7 |
+
import os
|
8 |
+
|
9 |
+
from dotenv import load_dotenv
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
llm = GooglePalm(google_api_key=os.environ["GOOGLE_API_KEY"], temperature=0.1)
|
13 |
+
|
14 |
+
instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
|
15 |
+
vectordb_file_path = "faiss_index"
|
16 |
+
|
17 |
+
def get_qa_chain():
|
18 |
+
|
19 |
+
vectordb = FAISS.load_local(vectordb_file_path, instructor_embeddings)
|
20 |
+
|
21 |
+
retriever = vectordb.as_retriever(score_threshold=0.7)
|
22 |
+
|
23 |
+
prompt_template = """You are a legal assistant chatbot, your name is "Nyaay Sahaayak", your main aim is to answer the queries related to the Indian laws and legal system,
|
24 |
+
if the question is not related to Indian laws and legal system kindly say dont know about the given question
|
25 |
+
Given the following context and a question, generate an answer based on the context or related to the Indian laws and legal system. The context given may not be always right
|
26 |
+
for the given question, hence cross verify yourself inorder to give an accurate answer that is only related to Indian laws and legal system.
|
27 |
+
Add some creativity to make the answer look readable and easily understandable. If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer,
|
28 |
+
if the question is out of the Indian laws and legal system, strictly deny them that you are not trained for it and only here for Indian law and legal system.
|
29 |
+
|
30 |
+
CONTEXT: {context}
|
31 |
+
|
32 |
+
QUESTION: {question}
|
33 |
+
|
34 |
+
"""
|
35 |
+
|
36 |
+
PROMPT = PromptTemplate(
|
37 |
+
template=prompt_template, input_variables=["context", "question"]
|
38 |
+
)
|
39 |
+
|
40 |
+
chain = RetrievalQA.from_chain_type(llm=llm,
|
41 |
+
chain_type="stuff",
|
42 |
+
retriever=retriever,
|
43 |
+
input_key="query",
|
44 |
+
return_source_documents=True,
|
45 |
+
chain_type_kwargs={"prompt": PROMPT})
|
46 |
+
|
47 |
+
return chain
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
chain = get_qa_chain()
|
main.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, render_template, request
|
2 |
+
from translate import translate
|
3 |
+
from classify import classify_query
|
4 |
+
|
5 |
+
app = Flask(__name__,static_url_path='', static_folder='web/static')
|
6 |
+
|
7 |
+
@app.route('/')
|
8 |
+
def index():
|
9 |
+
return render_template('index.html')
|
10 |
+
|
11 |
+
@app.route('/NyaaySahaayak')
|
12 |
+
def NyaaySahaayak():
|
13 |
+
return render_template('NyaaySahaayak.html')
|
14 |
+
|
15 |
+
@app.route('/chat', methods=['POST'])
|
16 |
+
def chat():
|
17 |
+
user_message = request.form['user_message']
|
18 |
+
classification = classify_query(user_message)
|
19 |
+
if classification == 'Non-Indian Legal':
|
20 |
+
response = "Apologies, but I'm here to assist with questions related to Indian laws and legal matters only. If you have any queries within this domain, feel free to ask. Otherwise, I may not have the information you're looking for. Thank you for understanding."
|
21 |
+
else:
|
22 |
+
response = translate(user_message)
|
23 |
+
return {'bot_response': response}
|
24 |
+
|
25 |
+
if __name__ == '__main__':
|
26 |
+
app.run(host='0.0.0.0', port=8000)
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain==0.0.284
|
2 |
+
python-dotenv==1.0.0
|
3 |
+
streamlit==1.22.0
|
4 |
+
tiktoken==0.4.0
|
5 |
+
faiss-cpu==1.7.4
|
6 |
+
protobuf~=3.19.0
|
7 |
+
google-generativeai
|
8 |
+
sentence-transformers==2.2.2
|
9 |
+
InstructorEmbedding==1.0.0
|
10 |
+
googletrans==4.0.0rc1
|
11 |
+
flask
|
12 |
+
re
|
translate.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_helper import get_qa_chain
|
2 |
+
from googletrans import Translator
|
3 |
+
from format import format_paragraph
|
4 |
+
|
5 |
+
translator = Translator()
|
6 |
+
chain = get_qa_chain()
|
7 |
+
|
8 |
+
def translate(user_message):
|
9 |
+
query = user_message
|
10 |
+
result = translator.detect(query)
|
11 |
+
lang = result.lang
|
12 |
+
if(lang != 'en'):
|
13 |
+
translatedlang = translator.translate(query)
|
14 |
+
user_message = translatedlang.text
|
15 |
+
bot_response = chain(user_message)
|
16 |
+
response = bot_response['result']
|
17 |
+
response = format_paragraph(response)
|
18 |
+
if(lang != 'en'):
|
19 |
+
translation = translator.translate(response, dest=result.lang)
|
20 |
+
response = translation.text
|
21 |
+
return response
|