Spaces:

OmkarGhugarkar
/

LawUseCase

Runtime error

App Files Files Community

OmkarGhugarkar commited on Sep 4, 2024

Commit

e59d7a4

verified ·

1 Parent(s): 7b8ff52

First Commit of Files

Browse files

Files changed (2) hide show

app.py +155 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import http.client
+import urllib.request, urllib.parse, urllib.error
+import base64
+import json
+import re
+import google.generativeai as genai
+import os
+from PyPDF2 import PdfReader
+from bs4 import BeautifulSoup
+import streamlit as st
+import time
+google_api_key = "AIzaSyDdnoC2syL3bor01IRbbaPLZSEgJkYB7BI"
+headers    = {'Authorization': 'Token %s' % 'ca819b0b0853b9a9a76f0f421a884b88035c87b0', \
+                           'Accept': 'application/json'}
+basehost   = 'api.indiankanoon.org'
+def call_api(url):
+        connection = http.client.HTTPSConnection(basehost)
+        connection.request('POST', url, headers = headers)
+        response = connection.getresponse()
+        results = response.read()
+        return results
+def search(q, pagenum, maxpages):
+        q = urllib.parse.quote_plus(q.encode('utf8'))
+        url = '/search/?formInput=%s&pagenum=%d&maxpages=%d' % (q, pagenum, maxpages)
+        return call_api(url)
+def fetch_doc(docid):
+        url = '/doc/%d/' % docid
+        args = []
+        if args:
+            url = url + '?' + '&'.join(args)
+        return call_api(url)
+def fetch_orig_doc(docid):
+        url = '/origdoc/%d/' % docid
+        return url, call_api(url)
+def get_file_extension(mtype):
+        t = 'unkwn'
+        if not mtype:
+            print (mtype)
+        elif re.match('text/html', mtype):
+            t = 'html'
+        elif re.match('application/postscript', mtype):
+            t = 'ps'
+        elif re.match('application/pdf', mtype):
+            t = 'pdf'
+        elif re.match('text/plain', mtype):
+            t = 'txt'
+        elif re.match('image/png', mtype):
+            t = 'png'
+        return t
+def save_original(docid, orig, origpath,path):
+        obj = json.loads(orig)
+        if 'errmsg' in obj:
+            return
+        doc = base64.b64decode(obj['doc'])
+        extension = get_file_extension(obj['Content-Type'])
+        name = origpath.split('/')[-2]
+        filepath   = path + '/' + name + '.%s' % extension
+        filehandle = open(filepath, 'wb')
+        filehandle.write(doc)
+        filehandle.close()
+def pipeline(q):
+  genai.configure(api_key=google_api_key)
+  model = genai.GenerativeModel("gemini-1.5-flash")
+  response = model.generate_content(f"Make this sentence grammatically correct. Reply back only the sentence nothing more.  {q}")
+  q = response.text
+  print(q)
+  folder_path = q.split()[0]
+  try:
+    os.mkdir(folder_path)
+  except:
+    print("Folder already exist")
+  result = search(q,0,1)
+  obj = json.loads(result)
+  docs = obj['docs']
+  print(len(docs))
+  for doc in docs:
+      docid = doc['tid']
+      title = doc['title']
+      toc = {'docid': docid, 'title': title, 'position': 1, \
+              'date': doc['publishdate'], 'court': doc['docsource']}
+      origpath, orig = fetch_orig_doc(docid)
+      d = json.loads(orig)
+      save_original(docid, orig, origpath,folder_path)
+      print(docid)
+  files = os.listdir(folder_path)
+  summary = ''
+  files.sort()
+  print("Going Through the files now")
+  for file in files:
+    if file.endswith('.html'):
+      # Read HTML content from a file
+      print(file)
+      time.sleep(30)
+      with open(f'{folder_path}/{file}', 'r', encoding='utf-8') as data:
+          try:
+            html_cont = data.read()
+          except:
+            continue
+      # Parse the HTML content
+      soup = BeautifulSoup(html_cont, 'html.parser')
+      # Extract text from all tags
+      all_tags = soup.find_all()
+      text = ''
+      for tag in all_tags:
+          text+= tag.get_text() + " "
+      count = len(re.findall(r'\w+', text))
+      print("count ", count)
+    if file.endswith('.pdf'):
+      print(file)
+      reader = PdfReader(f'{folder_path}/{file}')
+      text = ""
+      for page in reader.pages:
+          text += page.extract_text() + "\n"
+      count = len(re.findall(r'\w+', text))
+      print("count ", count)
+    response = model.generate_content(f"Write a summary for me of this case in a systematic manner. Explicitally refer to all the penal codes mentioned in those {text}")
+    print(response.prompt_feedback)
+    try:
+        summary += response.text + f" Reference link - https://indiankanoon.org/doc/{docid}" +"#"*100
+    except:
+        print(f"File skipped {origpath}")
+  print("Doing Final")
+  time.sleep(60)
+  final_response = model.generate_content(f"You are a lawyer and want to do a good research for a case {q}. You have collated past cases {summary}. Now use these past evidences to make a good research for the case {q}. Intensively use the penal codes those are mentioned in the past cases. Be very careful as this is a senstive matter. The answer should be based only on the past cases. At the end of the answer give the reference links to cases cited also. Remember you are going to report it to a client, so please be polite and use positive sentences. The client is unaware summaries are being used in the background, reply in a professional manner.")
+  print(final_response.text)
+  return final_response.text
+q = "can a wife who instituted criminal cases against husband also initiate discplinary proceedings against the husband at his workplace , basing on the same set of allegations ?"
+st.title("Please have patience, we will run on 2M+ Tokens, might take upto 5 mins :)")
+question = st.text_input("", placeholder="Can a wife who instituted criminal cases against her husband also initiate disciplinary proceedings against him at his workplace, based on the same set of allegations?", label_visibility="collapsed")
+if question:
+    st.markdown(pipeline(question))

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+beautifulsoup4
+pypdf2
+google-generativeai