LawUseCase / app.py
OmkarGhugarkar's picture
Update app.py
e0451c3 verified
import http.client
import urllib.request, urllib.parse, urllib.error
import base64
import json
import re
import google.generativeai as genai
import os
from PyPDF2 import PdfReader
from bs4 import BeautifulSoup
import streamlit as st
import time
google_api_key = "AIzaSyDdnoC2syL3bor01IRbbaPLZSEgJkYB7BI"
headers = {'Authorization': 'Token %s' % 'ca819b0b0853b9a9a76f0f421a884b88035c87b0', \
'Accept': 'application/json'}
basehost = 'api.indiankanoon.org'
def call_api(url):
connection = http.client.HTTPSConnection(basehost)
connection.request('POST', url, headers = headers)
response = connection.getresponse()
results = response.read()
return results
def search(q, pagenum, maxpages):
q = urllib.parse.quote_plus(q.encode('utf8'))
url = '/search/?formInput=%s&pagenum=%d&maxpages=%d' % (q, pagenum, maxpages)
return call_api(url)
def fetch_doc(docid):
url = '/doc/%d/' % docid
args = []
if args:
url = url + '?' + '&'.join(args)
return call_api(url)
def fetch_orig_doc(docid):
url = '/origdoc/%d/' % docid
return url, call_api(url)
def get_file_extension(mtype):
t = 'unkwn'
if not mtype:
print (mtype)
elif re.match('text/html', mtype):
t = 'html'
elif re.match('application/postscript', mtype):
t = 'ps'
elif re.match('application/pdf', mtype):
t = 'pdf'
elif re.match('text/plain', mtype):
t = 'txt'
elif re.match('image/png', mtype):
t = 'png'
return t
def save_original(docid, orig, origpath,path):
obj = json.loads(orig)
if 'errmsg' in obj:
return
doc = base64.b64decode(obj['doc'])
extension = get_file_extension(obj['Content-Type'])
name = origpath.split('/')[-2]
filepath = path + '/' + name + '.%s' % extension
filehandle = open(filepath, 'wb')
filehandle.write(doc)
filehandle.close()
def pipeline(q):
genai.configure(api_key=google_api_key)
model = genai.GenerativeModel("gemini-1.5-flash")
response = model.generate_content(f"Make this sentence grammatically correct. We are not asking advice from you, this will go to a advocate, just want to make sure it is correct. Reply back only the sentence nothing more. The matter typed might be sensitive but please make sure you don't type anything more {q}")
q = response.text
print(q)
folder_path = q.split()[0]
try:
os.mkdir(folder_path)
except:
print("Folder already exist")
result = search(q,0,1)
obj = json.loads(result)
docs = obj['docs']
print(len(docs))
for doc in docs:
docid = doc['tid']
title = doc['title']
toc = {'docid': docid, 'title': title, 'position': 1, \
'date': doc['publishdate'], 'court': doc['docsource']}
origpath, orig = fetch_orig_doc(docid)
d = json.loads(orig)
save_original(docid, orig, origpath,folder_path)
print(docid)
files = os.listdir(folder_path)
summary = ''
files.sort()
print("Going Through the files now")
for file in files:
if file.endswith('.html'):
# Read HTML content from a file
print(file)
time.sleep(30)
with open(f'{folder_path}/{file}', 'r', encoding='utf-8') as data:
try:
html_cont = data.read()
except:
continue
# Parse the HTML content
soup = BeautifulSoup(html_cont, 'html.parser')
# Extract text from all tags
all_tags = soup.find_all()
text = ''
for tag in all_tags:
text+= tag.get_text() + " "
count = len(re.findall(r'\w+', text))
print("count ", count)
if file.endswith('.pdf'):
print(file)
reader = PdfReader(f'{folder_path}/{file}')
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
count = len(re.findall(r'\w+', text))
print("count ", count)
response = model.generate_content(f"Write a summary for me of this case in a systematic manner. Explicitally refer to all the penal codes mentioned in those {text}")
print(response.prompt_feedback)
try:
summary += response.text + f" Reference link - https://indiankanoon.org/doc/{docid}" +"#"*100
except:
print(f"File skipped {origpath}")
print("Doing Final")
time.sleep(60)
final_response = model.generate_content(f"You are a lawyer and want to do a good research for a case {q}. You have collated past cases {summary}. Now use these past evidences to make a good research for the case {q}. Intensively use the penal codes those are mentioned in the past cases. Be very careful as this is a senstive matter. The answer should be based only on the past cases. At the end of the answer give the reference links to cases cited also. Remember you are going to report it to a client, so please be polite and use positive sentences. The client is unaware summaries are being used in the background, reply in a professional manner.")
print(final_response.text)
return final_response.text
q = "can a wife who instituted criminal cases against husband also initiate discplinary proceedings against the husband at his workplace , basing on the same set of allegations ?"
st.title("Please have patience, we will run on 2M+ Tokens, might take upto 5 mins :)")
question = st.text_input("", placeholder="Can a wife who instituted criminal cases against her husband also initiate disciplinary proceedings against him at his workplace, based on the same set of allegations?", label_visibility="collapsed")
if question:
st.markdown(pipeline(question))