Rajeev-Isaac commited on
Commit
2be028b
·
verified ·
1 Parent(s): ae56acc

Upload 8 files

Browse files
Files changed (8) hide show
  1. Indianconstitution.csv +0 -0
  2. classify.py +29 -0
  3. format.py +13 -0
  4. ingest.py +18 -0
  5. langchain_helper.py +50 -0
  6. main.py +26 -0
  7. requirements.txt +12 -0
  8. translate.py +21 -0
Indianconstitution.csv ADDED
The diff for this file is too large to render. See raw diff
 
classify.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+ import os
3
+ import google.generativeai as genai
4
+
5
+ load_dotenv()
6
+
7
+ genai.configure(api_key=os.environ["GOOGLE_API_KEY"])
8
+
9
+ model = genai.GenerativeModel('gemini-pro')
10
+
11
+ def classify_query(query):
12
+ classifier_prompt = (
13
+ "## Query Classification\n"
14
+ "Please classify the following query as 'Indian Legal' or 'Non-Indian Legal':\n"
15
+ "Query: '{}'\n"
16
+ "Classification:"
17
+ )
18
+
19
+ # Compose prompt with the given query
20
+ prompt = classifier_prompt.format(query)
21
+
22
+ # Use Google Gen AI to generate a response based on the prompt
23
+ response = model.generate_content(prompt)
24
+
25
+ # Extract the generated classification from the response
26
+ classification = response.text.strip()
27
+
28
+ return classification
29
+
format.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+
3
+ def format_paragraph(text):
4
+
5
+ text = text.replace("**", "</b>").replace("**", "<b>")
6
+
7
+
8
+ pattern = re.compile(r'</b>')
9
+
10
+ text = re.sub(pattern, lambda m: "<b>" if pattern.subn('', text[:m.start()])[1] % 2 == 0 else m.group(0), text)
11
+ text = text.replace('\n','<br>')
12
+ text = text.replace('*','&#8226')
13
+ return text
ingest.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.document_loaders.csv_loader import CSVLoader
4
+
5
+ vectordb_file_path = "faiss_index"
6
+ instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
7
+
8
+
9
+ loader = CSVLoader(file_path='Indianconstitution.csv',encoding='utf-8-sig')
10
+ data = loader.load()
11
+
12
+
13
+ vectordb = FAISS.from_documents(documents=data,
14
+ embedding=instructor_embeddings)
15
+
16
+
17
+ vectordb.save_local(vectordb_file_path)
18
+
langchain_helper.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.vectorstores import FAISS
2
+ from langchain.llms import GooglePalm
3
+ from langchain.document_loaders.csv_loader import CSVLoader
4
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain.chains import RetrievalQA
7
+ import os
8
+
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+
12
+ llm = GooglePalm(google_api_key=os.environ["GOOGLE_API_KEY"], temperature=0.1)
13
+
14
+ instructor_embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
15
+ vectordb_file_path = "faiss_index"
16
+
17
+ def get_qa_chain():
18
+
19
+ vectordb = FAISS.load_local(vectordb_file_path, instructor_embeddings)
20
+
21
+ retriever = vectordb.as_retriever(score_threshold=0.7)
22
+
23
+ prompt_template = """You are a legal assistant chatbot, your name is "Nyaay Sahaayak", your main aim is to answer the queries related to the Indian laws and legal system,
24
+ if the question is not related to Indian laws and legal system kindly say dont know about the given question
25
+ Given the following context and a question, generate an answer based on the context or related to the Indian laws and legal system. The context given may not be always right
26
+ for the given question, hence cross verify yourself inorder to give an accurate answer that is only related to Indian laws and legal system.
27
+ Add some creativity to make the answer look readable and easily understandable. If the answer is not found in the context, kindly state "I don't know." Don't try to make up an answer,
28
+ if the question is out of the Indian laws and legal system, strictly deny them that you are not trained for it and only here for Indian law and legal system.
29
+
30
+ CONTEXT: {context}
31
+
32
+ QUESTION: {question}
33
+
34
+ """
35
+
36
+ PROMPT = PromptTemplate(
37
+ template=prompt_template, input_variables=["context", "question"]
38
+ )
39
+
40
+ chain = RetrievalQA.from_chain_type(llm=llm,
41
+ chain_type="stuff",
42
+ retriever=retriever,
43
+ input_key="query",
44
+ return_source_documents=True,
45
+ chain_type_kwargs={"prompt": PROMPT})
46
+
47
+ return chain
48
+
49
+ if __name__ == "__main__":
50
+ chain = get_qa_chain()
main.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, render_template, request
2
+ from translate import translate
3
+ from classify import classify_query
4
+
5
+ app = Flask(__name__,static_url_path='', static_folder='web/static')
6
+
7
+ @app.route('/')
8
+ def index():
9
+ return render_template('index.html')
10
+
11
+ @app.route('/NyaaySahaayak')
12
+ def NyaaySahaayak():
13
+ return render_template('NyaaySahaayak.html')
14
+
15
+ @app.route('/chat', methods=['POST'])
16
+ def chat():
17
+ user_message = request.form['user_message']
18
+ classification = classify_query(user_message)
19
+ if classification == 'Non-Indian Legal':
20
+ response = "Apologies, but I'm here to assist with questions related to Indian laws and legal matters only. If you have any queries within this domain, feel free to ask. Otherwise, I may not have the information you're looking for. Thank you for understanding."
21
+ else:
22
+ response = translate(user_message)
23
+ return {'bot_response': response}
24
+
25
+ if __name__ == '__main__':
26
+ app.run(host='0.0.0.0', port=8000)
requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.0.284
2
+ python-dotenv==1.0.0
3
+ streamlit==1.22.0
4
+ tiktoken==0.4.0
5
+ faiss-cpu==1.7.4
6
+ protobuf~=3.19.0
7
+ google-generativeai
8
+ sentence-transformers==2.2.2
9
+ InstructorEmbedding==1.0.0
10
+ googletrans==4.0.0rc1
11
+ flask
12
+ re
translate.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_helper import get_qa_chain
2
+ from googletrans import Translator
3
+ from format import format_paragraph
4
+
5
+ translator = Translator()
6
+ chain = get_qa_chain()
7
+
8
+ def translate(user_message):
9
+ query = user_message
10
+ result = translator.detect(query)
11
+ lang = result.lang
12
+ if(lang != 'en'):
13
+ translatedlang = translator.translate(query)
14
+ user_message = translatedlang.text
15
+ bot_response = chain(user_message)
16
+ response = bot_response['result']
17
+ response = format_paragraph(response)
18
+ if(lang != 'en'):
19
+ translation = translator.translate(response, dest=result.lang)
20
+ response = translation.text
21
+ return response