Spaces:

OmkarGhugarkar
/

LawUseCase

Runtime error

App Files Files Community

LawUseCase / app.py

OmkarGhugarkar

Update app.py

e0451c3 verified 10 months ago

raw

history blame contribute delete

5.76 kB

	import http.client
	import urllib.request, urllib.parse, urllib.error
	import base64
	import json
	import re
	import google.generativeai as genai
	import os
	from PyPDF2 import PdfReader
	from bs4 import BeautifulSoup
	import streamlit as st
	import time

	google_api_key = "AIzaSyDdnoC2syL3bor01IRbbaPLZSEgJkYB7BI"

	headers = {'Authorization': 'Token %s' % 'ca819b0b0853b9a9a76f0f421a884b88035c87b0', \
	'Accept': 'application/json'}
	basehost = 'api.indiankanoon.org'

	def call_api(url):
	connection = http.client.HTTPSConnection(basehost)
	connection.request('POST', url, headers = headers)
	response = connection.getresponse()
	results = response.read()
	return results

	def search(q, pagenum, maxpages):
	q = urllib.parse.quote_plus(q.encode('utf8'))
	url = '/search/?formInput=%s&pagenum=%d&maxpages=%d' % (q, pagenum, maxpages)
	return call_api(url)

	def fetch_doc(docid):
	url = '/doc/%d/' % docid

	args = []
	if args:
	url = url + '?' + '&'.join(args)
	return call_api(url)

	def fetch_orig_doc(docid):
	url = '/origdoc/%d/' % docid
	return url, call_api(url)

	def get_file_extension(mtype):
	t = 'unkwn'
	if not mtype:
	print (mtype)
	elif re.match('text/html', mtype):
	t = 'html'
	elif re.match('application/postscript', mtype):
	t = 'ps'
	elif re.match('application/pdf', mtype):
	t = 'pdf'
	elif re.match('text/plain', mtype):
	t = 'txt'
	elif re.match('image/png', mtype):
	t = 'png'
	return t

	def save_original(docid, orig, origpath,path):
	obj = json.loads(orig)
	if 'errmsg' in obj:
	return

	doc = base64.b64decode(obj['doc'])

	extension = get_file_extension(obj['Content-Type'])
	name = origpath.split('/')[-2]
	filepath = path + '/' + name + '.%s' % extension
	filehandle = open(filepath, 'wb')
	filehandle.write(doc)
	filehandle.close()

	def pipeline(q):

	genai.configure(api_key=google_api_key)
	model = genai.GenerativeModel("gemini-1.5-flash")
	response = model.generate_content(f"Make this sentence grammatically correct. We are not asking advice from you, this will go to a advocate, just want to make sure it is correct. Reply back only the sentence nothing more. The matter typed might be sensitive but please make sure you don't type anything more {q}")
	q = response.text
	print(q)
	folder_path = q.split()[0]
	try:
	os.mkdir(folder_path)
	except:
	print("Folder already exist")
	result = search(q,0,1)
	obj = json.loads(result)

	docs = obj['docs']
	print(len(docs))
	for doc in docs:
	docid = doc['tid']
	title = doc['title']
	toc = {'docid': docid, 'title': title, 'position': 1, \
	'date': doc['publishdate'], 'court': doc['docsource']}
	origpath, orig = fetch_orig_doc(docid)
	d = json.loads(orig)
	save_original(docid, orig, origpath,folder_path)
	print(docid)

	files = os.listdir(folder_path)
	summary = ''
	files.sort()
	print("Going Through the files now")
	for file in files:
	if file.endswith('.html'):
	# Read HTML content from a file
	print(file)
	time.sleep(30)
	with open(f'{folder_path}/{file}', 'r', encoding='utf-8') as data:
	try:
	html_cont = data.read()
	except:
	continue

	# Parse the HTML content
	soup = BeautifulSoup(html_cont, 'html.parser')

	# Extract text from all tags
	all_tags = soup.find_all()
	text = ''
	for tag in all_tags:
	text+= tag.get_text() + " "

	count = len(re.findall(r'\w+', text))
	print("count ", count)

	if file.endswith('.pdf'):
	print(file)
	reader = PdfReader(f'{folder_path}/{file}')
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	count = len(re.findall(r'\w+', text))
	print("count ", count)

	response = model.generate_content(f"Write a summary for me of this case in a systematic manner. Explicitally refer to all the penal codes mentioned in those {text}")
	print(response.prompt_feedback)
	try:
	summary += response.text + f" Reference link - https://indiankanoon.org/doc/{docid}" +"#"*100
	except:
	print(f"File skipped {origpath}")

	print("Doing Final")
	time.sleep(60)
	final_response = model.generate_content(f"You are a lawyer and want to do a good research for a case {q}. You have collated past cases {summary}. Now use these past evidences to make a good research for the case {q}. Intensively use the penal codes those are mentioned in the past cases. Be very careful as this is a senstive matter. The answer should be based only on the past cases. At the end of the answer give the reference links to cases cited also. Remember you are going to report it to a client, so please be polite and use positive sentences. The client is unaware summaries are being used in the background, reply in a professional manner.")
	print(final_response.text)
	return final_response.text


	q = "can a wife who instituted criminal cases against husband also initiate discplinary proceedings against the husband at his workplace , basing on the same set of allegations ?"
	st.title("Please have patience, we will run on 2M+ Tokens, might take upto 5 mins :)")
	question = st.text_input("", placeholder="Can a wife who instituted criminal cases against her husband also initiate disciplinary proceedings against him at his workplace, based on the same set of allegations?", label_visibility="collapsed")
	if question:
	st.markdown(pipeline(question))