import http.client import urllib.request, urllib.parse, urllib.error import base64 import json import re import google.generativeai as genai import os from PyPDF2 import PdfReader from bs4 import BeautifulSoup import streamlit as st import time google_api_key = "AIzaSyDdnoC2syL3bor01IRbbaPLZSEgJkYB7BI" headers = {'Authorization': 'Token %s' % 'ca819b0b0853b9a9a76f0f421a884b88035c87b0', \ 'Accept': 'application/json'} basehost = 'api.indiankanoon.org' def call_api(url): connection = http.client.HTTPSConnection(basehost) connection.request('POST', url, headers = headers) response = connection.getresponse() results = response.read() return results def search(q, pagenum, maxpages): q = urllib.parse.quote_plus(q.encode('utf8')) url = '/search/?formInput=%s&pagenum=%d&maxpages=%d' % (q, pagenum, maxpages) return call_api(url) def fetch_doc(docid): url = '/doc/%d/' % docid args = [] if args: url = url + '?' + '&'.join(args) return call_api(url) def fetch_orig_doc(docid): url = '/origdoc/%d/' % docid return url, call_api(url) def get_file_extension(mtype): t = 'unkwn' if not mtype: print (mtype) elif re.match('text/html', mtype): t = 'html' elif re.match('application/postscript', mtype): t = 'ps' elif re.match('application/pdf', mtype): t = 'pdf' elif re.match('text/plain', mtype): t = 'txt' elif re.match('image/png', mtype): t = 'png' return t def save_original(docid, orig, origpath,path): obj = json.loads(orig) if 'errmsg' in obj: return doc = base64.b64decode(obj['doc']) extension = get_file_extension(obj['Content-Type']) name = origpath.split('/')[-2] filepath = path + '/' + name + '.%s' % extension filehandle = open(filepath, 'wb') filehandle.write(doc) filehandle.close() def pipeline(q): genai.configure(api_key=google_api_key) model = genai.GenerativeModel("gemini-1.5-flash") response = model.generate_content(f"Make this sentence grammatically correct. We are not asking advice from you, this will go to a advocate, just want to make sure it is correct. Reply back only the sentence nothing more. The matter typed might be sensitive but please make sure you don't type anything more {q}") q = response.text print(q) folder_path = q.split()[0] try: os.mkdir(folder_path) except: print("Folder already exist") result = search(q,0,1) obj = json.loads(result) docs = obj['docs'] print(len(docs)) for doc in docs: docid = doc['tid'] title = doc['title'] toc = {'docid': docid, 'title': title, 'position': 1, \ 'date': doc['publishdate'], 'court': doc['docsource']} origpath, orig = fetch_orig_doc(docid) d = json.loads(orig) save_original(docid, orig, origpath,folder_path) print(docid) files = os.listdir(folder_path) summary = '' files.sort() print("Going Through the files now") for file in files: if file.endswith('.html'): # Read HTML content from a file print(file) time.sleep(30) with open(f'{folder_path}/{file}', 'r', encoding='utf-8') as data: try: html_cont = data.read() except: continue # Parse the HTML content soup = BeautifulSoup(html_cont, 'html.parser') # Extract text from all tags all_tags = soup.find_all() text = '' for tag in all_tags: text+= tag.get_text() + " " count = len(re.findall(r'\w+', text)) print("count ", count) if file.endswith('.pdf'): print(file) reader = PdfReader(f'{folder_path}/{file}') text = "" for page in reader.pages: text += page.extract_text() + "\n" count = len(re.findall(r'\w+', text)) print("count ", count) response = model.generate_content(f"Write a summary for me of this case in a systematic manner. Explicitally refer to all the penal codes mentioned in those {text}") print(response.prompt_feedback) try: summary += response.text + f" Reference link - https://indiankanoon.org/doc/{docid}" +"#"*100 except: print(f"File skipped {origpath}") print("Doing Final") time.sleep(60) final_response = model.generate_content(f"You are a lawyer and want to do a good research for a case {q}. You have collated past cases {summary}. Now use these past evidences to make a good research for the case {q}. Intensively use the penal codes those are mentioned in the past cases. Be very careful as this is a senstive matter. The answer should be based only on the past cases. At the end of the answer give the reference links to cases cited also. Remember you are going to report it to a client, so please be polite and use positive sentences. The client is unaware summaries are being used in the background, reply in a professional manner.") print(final_response.text) return final_response.text q = "can a wife who instituted criminal cases against husband also initiate discplinary proceedings against the husband at his workplace , basing on the same set of allegations ?" st.title("Please have patience, we will run on 2M+ Tokens, might take upto 5 mins :)") question = st.text_input("", placeholder="Can a wife who instituted criminal cases against her husband also initiate disciplinary proceedings against him at his workplace, based on the same set of allegations?", label_visibility="collapsed") if question: st.markdown(pipeline(question))